"""Phase 1: Essential Dividend Metrics Implementation (DEPRECATED) ⚠️ DEPRECATED: This DOM-scraping based approach has been replaced by phase1_api_scraper.py which uses Schwab's REST APIs directly. The API approach is more reliable, complete, and maintainable than DOM scraping. This module is kept for reference only. New code should use phase1_api_scraper.py. Old approach extracts from DOM: - Quote/Price Data (symbol bar) - Enhanced Dividend Information (forward-looking dates) - Core Earnings Metrics (EPS, forecasts) - Basic Valuation Ratios (P/E, Forward P/E, PEG) - Calculated Metrics (payout ratio) """ from typing import Dict, Any, Optional import re import logging from ...core import QuoteData, EnhancedDividends, EarningsData, CalculatedMetrics, EquityPhase1Data logger = logging.getLogger(__name__) def _parse_float(value: Any) -> Optional[float]: """Safely parse a value to float, handling $ and % symbols.""" if value is None: return None try: # Remove common formatting characters clean = str(value).strip().replace('$', '').replace(',', '').replace('%', '') if clean and clean != '--' and clean.lower() != 'n/a': return float(clean) except (ValueError, AttributeError): pass return None def _parse_int(value: Any) -> Optional[int]: """Safely parse a value to int.""" if value is None: return None try: clean = str(value).strip().replace(',', '') if clean and clean != '--' and clean.lower() != 'n/a': return int(float(clean)) except (ValueError, AttributeError): pass return None def _parse_volume(volume_str: str) -> Optional[int]: """Parse volume string like '8M', '22.4M', '1.2B' to integer.""" if not volume_str: return None try: volume_str = volume_str.strip().upper() multiplier = 1 if volume_str.endswith('K'): multiplier = 1_000 volume_str = volume_str[:-1] elif volume_str.endswith('M'): multiplier = 1_000_000 volume_str = volume_str[:-1] elif volume_str.endswith('B'): multiplier = 1_000_000_000 volume_str = volume_str[:-1] value = float(volume_str) return int(value * multiplier) except (ValueError, AttributeError): return None def _parse_revenue(revenue_str: str) -> Optional[float]: """Parse revenue string like '$92.15B', '$1.5M' to dollar value.""" if not revenue_str: return None try: revenue_str = revenue_str.strip().upper().replace('$', '').replace(',', '') multiplier = 1 if revenue_str.endswith('K'): multiplier = 1_000 revenue_str = revenue_str[:-1] elif revenue_str.endswith('M'): multiplier = 1_000_000 revenue_str = revenue_str[:-1] elif revenue_str.endswith('B'): multiplier = 1_000_000_000 revenue_str = revenue_str[:-1] elif revenue_str.endswith('T'): multiplier = 1_000_000_000_000 revenue_str = revenue_str[:-1] value = float(revenue_str) return value * multiplier except (ValueError, AttributeError): return None async def extract_quote_data(page, ticker: str = "", debug: bool = False) -> QuoteData: """Extract quote/price data from symbol bar. Args: page: Playwright page object ticker: Stock ticker symbol (for pattern matching) debug: Enable debug logging Returns: QuoteData object with extracted fields """ quote = QuoteData() try: if debug: logger.debug("Starting quote data extraction...") # Wait for symbol bar content (look for key labels) try: await page.wait_for_selector('#app-symbol-bar-component, text=Previous close', state='attached', timeout=15000) except Exception: if debug: logger.debug("Timeout waiting for symbol bar selector, attempting to parse whatever is there") # Extract symbol bar text content (fallback to body if specific component not found) symbol_bar_text = await page.evaluate(''' () => { const symbolBar = document.querySelector('#app-symbol-bar-component'); if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) return symbolBar.textContent; // If specific component not found, try to find the container with market data // Look for container with "Previous close" const labels = Array.from(document.querySelectorAll('span, div, p')); const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close')); if (prevCloseLabel) { // Return the parent's text content (go up a few levels to capture all data) let parent = prevCloseLabel.parentElement; let count = 0; while (parent && count < 8) { if (parent.textContent.length > 300) return parent.textContent; parent = parent.parentElement; count++; } } return document.body.textContent || ''; } ''') if debug: logger.debug(f"Symbol bar text (first 500 chars): {symbol_bar_text[:500]}") # Extract structured data quote_data = await page.evaluate(r''' (ticker) => { const data = {}; // Helper to get text content from page const getText = () => { const symbolBar = document.querySelector('#app-symbol-bar-component'); // Verify it looks like the right component by checking for "Previous close" if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) { return symbolBar.textContent; } // Fallback logic const labels = Array.from(document.querySelectorAll('span, div, p')); const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close')); if (prevCloseLabel) { let parent = prevCloseLabel.parentElement; let count = 0; while (parent && count < 8) { if (parent.textContent.length > 300) return parent.textContent; parent = parent.parentElement; count++; } } // Last resort: body text return document.body.textContent || ''; }; const fullText = getText(); // Try to find price in quote container first for accuracy const priceElement = document.querySelector('.symbol-quote-container, [data-testid="quote-price"]'); if (priceElement) { const priceText = priceElement.textContent || ''; const priceMatch = priceText.match(/\$([0-9,]+\.[0-9]+)/); if (priceMatch) data.price = priceMatch[1].replace(',', ''); } else { // Fallback regex for price if element not found // Look for price near top or just regex const priceMatch = fullText.match(/\$([0-9,]+\.[0-9]{2})(\s|[+-]|$)/); if (priceMatch) data.price = priceMatch[1].replace(',', ''); } // After hours (using \s* for robustness) const afterHoursMatch = fullText.match(/After hours:?\s*\$([0-9,.]+)/i); if (afterHoursMatch) data.after_hours_price = afterHoursMatch[1].replace(',', ''); const afterHoursChangeMatch = fullText.match(/After hours:.*?([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/i); if (afterHoursChangeMatch) { data.after_hours_change = afterHoursChangeMatch[1].replace('$', '').replace(',', ''); data.after_hours_change_percent = afterHoursChangeMatch[2]; } // Bid/Ask (using \s* for robustness) const bidMatch = fullText.match(/Bid\s*\$([0-9,.]+)/i); if (bidMatch) data.bid = bidMatch[1].replace(',', ''); const askMatch = fullText.match(/Ask\s*\$([0-9,.]+)/i); if (askMatch) data.ask = askMatch[1].replace(',', ''); const bidAskSizeMatch = fullText.match(/Bid\/Ask Size\s*([0-9]+\/[0-9]+)/i); if (bidAskSizeMatch) data.bid_ask_size = bidAskSizeMatch[1]; // Previous close and open (using \s* instead of \s+) const prevCloseMatch = fullText.match(/Previous close\s*\$([0-9,.]+)/i); if (prevCloseMatch) data.previous_close = prevCloseMatch[1].replace(',', ''); const openMatch = fullText.match(/Today's open\s*\$([0-9,.]+)/i); if (openMatch) data.open = openMatch[1].replace(',', ''); // Volume (using \s*) const volumeMatch = fullText.match(/Today's volume\s*([0-9.]+[KMB]?)/i); if (volumeMatch) data.volume = volumeMatch[1]; const volumeVsAvgMatch = fullText.match(/Today's volume\s*[0-9.]+[KMB]?\s*(Above Avg\.|Below Avg\.|Average)/i); if (volumeVsAvgMatch) data.volume_vs_avg = volumeVsAvgMatch[1]; // Day range // Pattern: "Today's range low $200.81 Today's range high $203.45" or similar // We'll look for "low $X" and "high $Y" appearing after "Today's range" const dayRangeMatch = fullText.match(/Today's range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i); if (dayRangeMatch) { data.day_range_low = dayRangeMatch[1].replace(',', ''); data.day_range_high = dayRangeMatch[2].replace(',', ''); } // 52-week range const weekRangeMatch = fullText.match(/52-week range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i); if (weekRangeMatch) { data.week_52_low = weekRangeMatch[1].replace(',', ''); data.week_52_high = weekRangeMatch[2].replace(',', ''); } // Market cap (may be in Share Profile section) const marketCapMatch = fullText.match(/Market Cap\s*\$([0-9.]+[KMBT])/i); if (marketCapMatch) data.market_cap = marketCapMatch[1]; // Change and change percent // Try specific formatted pattern first: TICKER $PRICE CHANGE CHANGE% // e.g. "JNJ $201.95 -1.03 -0.51%" const standardPattern = fullText.match(/\$([0-9,.]+)\s*([+-]?[0-9,.]+)\s*([+-]?[0-9.]+)%/); if (standardPattern) { if (!data.price) data.price = standardPattern[1].replace(',', ''); data.change = standardPattern[2]; data.change_percent = standardPattern[3]; } let percentMatch = null; if (ticker && !data.change_percent) { // Match: TICKER$digits.digits{2}percent% const tickerPattern = new RegExp(ticker + '\\\\.?[\\s]*\\$([0-9,]+\\\\.[0-9]{2})[\\s]*([0-9.]+)%', 'i'); percentMatch = fullText.match(tickerPattern); if (percentMatch) { data.change_percent = percentMatch[2]; } } if (!data.change_percent) { // Fallback: match any price+percent pattern with space const fallbackMatch = fullText.match(/\$[0-9,.]+\s*([+-]?[0-9.]+)%/); if (fallbackMatch) { data.change_percent = fallbackMatch[1]; } } // Pattern 2: "+$1.23 (+0.45%)" or "-$1.23 (-0.45%)" let changeMatch = fullText.match(/([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/); // Pattern 3: "$193.08 +1.23 +0.64%" (price followed by change) if (!changeMatch) { changeMatch = fullText.match(/\$[0-9,.]+\s*([+-][0-9,.]+)\s*([+-][0-9.]+)%/); } // Pattern 4: "Change: +1.23 (+0.64%)" if (!changeMatch) { changeMatch = fullText.match(/Change:?\s*([+-][0-9,.]+)\s*\(([+-][0-9.]+)%\)/i); } if (changeMatch) { data.change = changeMatch[1].replace('$', '').replace(',', ''); if (!data.change_percent) { data.change_percent = changeMatch[2].replace(/[+]/g, ''); } } // Exchange - look for NYSE, NASDAQ, etc. const exchangeMatch = fullText.match(/\b(NYSE|NASDAQ|AMEX|OTC|BATS)\b/i); if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase(); return data; } ''', ticker) # Parse and assign values quote.price = _parse_float(quote_data.get('price')) quote.change = _parse_float(quote_data.get('change')) quote.change_percent = _parse_float(quote_data.get('change_percent')) quote.after_hours_price = _parse_float(quote_data.get('after_hours_price')) quote.after_hours_change = _parse_float(quote_data.get('after_hours_change')) quote.after_hours_change_percent = _parse_float(quote_data.get('after_hours_change_percent')) quote.bid = _parse_float(quote_data.get('bid')) quote.ask = _parse_float(quote_data.get('ask')) quote.bid_ask_size = quote_data.get('bid_ask_size') quote.previous_close = _parse_float(quote_data.get('previous_close')) quote.open = _parse_float(quote_data.get('open')) quote.volume = _parse_volume(quote_data.get('volume', '')) quote.volume_vs_avg = quote_data.get('volume_vs_avg') quote.day_range_low = _parse_float(quote_data.get('day_range_low')) quote.day_range_high = _parse_float(quote_data.get('day_range_high')) quote.week_52_low = _parse_float(quote_data.get('week_52_low')) quote.week_52_high = _parse_float(quote_data.get('week_52_high')) quote.market_cap = quote_data.get('market_cap') # Try to extract sector and exchange from page header header_data = await page.evaluate(r''' () => { const data = {}; // Look for sector near company name const sectorElement = document.querySelector('[data-testid="sector"], .sector'); if (sectorElement) { data.sector = sectorElement.textContent.replace('Sector', '').trim(); } else { // Manual search for text containing "Sector" const spans = Array.from(document.querySelectorAll('span')); const sectorSpan = spans.find(el => el.textContent && el.textContent.includes('Sector')); if (sectorSpan) { data.sector = sectorSpan.textContent.replace('Sector', '').replace(':', '').trim(); } } // Look for exchange near ticker const exchangeElement = document.querySelector('[data-testid="exchange"], .exchange'); if (exchangeElement) { data.exchange = exchangeElement.textContent.trim(); } // Fallback: parse from page text const pageText = document.body.textContent || ''; if (!data.sector) { const sectorMatch = pageText.match(/Sector[:\s]+([A-Za-z\s&]+)/); if (sectorMatch) data.sector = sectorMatch[1].trim(); } if (!data.exchange) { const exchangeMatch = pageText.match(/(NYSE|NASDAQ|AMEX|OTC)/i); if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase(); } return data; } ''') quote.sector = header_data.get('sector') quote.exchange = header_data.get('exchange') if debug: logger.debug(f"Extracted quote data: price={quote.price}, volume={quote.volume}, " f"52w_range={quote.week_52_low}-{quote.week_52_high}") except Exception as e: if debug: logger.debug(f"Error extracting quote data: {e}") return quote async def extract_enhanced_dividends(page, debug: bool = False) -> EnhancedDividends: """Extract enhanced dividend data including next payment dates. Args: page: Playwright page object debug: Enable debug logging Returns: EnhancedDividends object with extracted fields """ dividends = EnhancedDividends() try: if debug: logger.debug("Starting enhanced dividend extraction...") # Wait for dividends panel to load await page.wait_for_selector('#dividends', timeout=15000) # Scroll to dividends panel await page.evaluate(''' () => { const dividendsPanel = document.querySelector('#dividends'); if (dividendsPanel) { dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' }); } } ''') await page.wait_for_timeout(1000) # CRITICAL: Click on the panel header to trigger content loading # Schwab's panels don't auto-load - they need to be clicked if debug: logger.debug("Clicking dividends panel header to trigger content load...") try: dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title, #dividends-togglechevron-button') if dividends_header: await dividends_header.click() await page.wait_for_timeout(2000) if debug: logger.debug("Clicked dividends panel header successfully") except Exception as e: if debug: logger.debug(f"Could not click dividends header: {e}") # Wait for content to load after click await page.wait_for_timeout(1000) # Extract dividend data dividend_data = await page.evaluate(''' () => { const data = {}; const dividendsPanel = document.querySelector('#dividends'); if (!dividendsPanel) return data; const fullText = dividendsPanel.textContent || ''; // DEBUG: Return sample of text for debugging data._debug_text_sample = fullText.substring(0, 800); // Next dividend payment const nextPaymentMatch = fullText.match(/Next Dividend Payment\\s*\\$([0-9.]+)/i); if (nextPaymentMatch) data.next_payment = nextPaymentMatch[1]; // Next pay date const nextPayDateMatch = fullText.match(/Next Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i); if (nextPayDateMatch) data.next_pay_date = nextPayDateMatch[1]; // Next ex-date const nextExDateMatch = fullText.match(/Next Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i); if (nextExDateMatch) data.next_ex_date = nextExDateMatch[1]; // Previous dividend payment const prevPaymentMatch = fullText.match(/Previous Dividend Payment\\s*\\$([0-9.]+)/i); if (prevPaymentMatch) data.previous_payment = prevPaymentMatch[1]; // Previous pay date const prevPayDateMatch = fullText.match(/Previous Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i); if (prevPayDateMatch) data.previous_pay_date = prevPayDateMatch[1]; // Previous ex-date const prevExDateMatch = fullText.match(/Previous Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i); if (prevExDateMatch) data.previous_ex_date = prevExDateMatch[1]; // Frequency const frequencyMatch = fullText.match(/Frequency\\s*(Quarterly|Monthly|Annual|Semi-Annual)/i); if (frequencyMatch) data.frequency = frequencyMatch[1]; // Annual Dividend Rate (IAD) const annualRateMatch = fullText.match(/Annual Dividend Rate.*?\\$([0-9.]+)/i); if (annualRateMatch) data.annual_rate = annualRateMatch[1]; // Annual Dividend Yield - appears after "Annual Dividend Yield" text // Text pattern: "Annual Dividend Yield...2.71%" const yieldMatch = fullText.match(/Annual Dividend Yield[\\s\\S]{0,300}?([0-9]+\\.[0-9]+)%/i); if (yieldMatch) data.annual_yield = yieldMatch[1]; return data; } ''') if debug and dividend_data.get('_debug_text_sample'): logger.debug(f"Dividend panel text sample: {dividend_data['_debug_text_sample']}") # Parse and assign values dividends.next_payment = _parse_float(dividend_data.get('next_payment')) dividends.next_pay_date = dividend_data.get('next_pay_date') dividends.next_ex_date = dividend_data.get('next_ex_date') dividends.previous_payment = _parse_float(dividend_data.get('previous_payment')) dividends.previous_pay_date = dividend_data.get('previous_pay_date') dividends.previous_ex_date = dividend_data.get('previous_ex_date') dividends.frequency = dividend_data.get('frequency') dividends.annual_rate = _parse_float(dividend_data.get('annual_rate')) dividends.annual_yield = _parse_float(dividend_data.get('annual_yield')) if debug: logger.debug(f"Extracted dividend data: next_payment={dividends.next_payment}, " f"next_pay_date={dividends.next_pay_date}, annual_rate={dividends.annual_rate}") except Exception as e: if debug: logger.debug(f"Error extracting dividend data: {e}") return dividends async def extract_earnings_data(page, debug: bool = False) -> EarningsData: """Extract earnings metrics and forecasts. Args: page: Playwright page object debug: Enable debug logging Returns: EarningsData object with extracted fields """ earnings = EarningsData() try: if debug: logger.debug("Starting earnings data extraction...") # Wait for earnings panel to load await page.wait_for_selector('#expected-earnings', timeout=15000) # Scroll to earnings panel await page.evaluate(''' () => { const earningsPanel = document.querySelector('#expected-earnings'); if (earningsPanel) { earningsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' }); } } ''') await page.wait_for_timeout(1000) # CRITICAL: Click on the panel header to trigger content loading # Schwab's panels don't auto-load - they need to be clicked if debug: logger.debug("Clicking earnings panel header to trigger content load...") try: earnings_header = await page.query_selector('#expected-earnings h2, #expected-earnings .sdps-panel__title, #expected-earnings-heading, #expected-earnings-togglechevron-button') if earnings_header: await earnings_header.click() await page.wait_for_timeout(2000) if debug: logger.debug("Clicked earnings panel header successfully") except Exception as e: if debug: logger.debug(f"Could not click earnings header: {e}") # Wait for content to load after click await page.wait_for_timeout(1000) # Check for and click "Show More" if present try: # Use JS to find and click - most robust way clicked = await page.evaluate(''' () => { const panel = document.querySelector('#expected-earnings'); if (!panel) return false; // Find any element with "Show More" text const elements = Array.from(panel.querySelectorAll('a, button, span, div')); const showMore = elements.find(el => el.textContent.trim().toLowerCase() === "show more"); if (showMore) { showMore.click(); return true; } return false; } ''') if clicked: if debug: logger.debug("found and clicked 'Show More' via JS") await page.wait_for_timeout(2000) elif debug: logger.debug("'Show More' not found or not clickable") except Exception as e: if debug: logger.debug(f"Error checking for Show More: {e}") # Extract earnings data earnings_data = await page.evaluate(r''' (debug) => { const data = {}; // Helper to get text content including Shadow DOMs const getDeepText = (root) => { if (!root) return ''; if (root.nodeType === Node.TEXT_NODE) return root.textContent; if (root.nodeType === Node.ELEMENT_NODE && root.shadowRoot) { return getDeepText(root.shadowRoot); } let text = ''; const children = root.childNodes; for (let i = 0; i < children.length; i++) { text += getDeepText(children[i]); } return text; }; const earningsPanel = document.querySelector('#expected-earnings'); let fullText = ''; if (earningsPanel) { fullText = getDeepText(earningsPanel); } // Fallback to body deep text if panel seems empty if (fullText.length < 500 || !fullText.includes("Announcement")) { fullText = getDeepText(document.body); } // Next earnings announcement - robust regex checking for various patterns let nextAnnouncementMatch = fullText.match(/Next Earnings Announcement.*?([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i); if (!nextAnnouncementMatch) { // Try alternate pattern: Announcement: 12/12/2025 nextAnnouncementMatch = fullText.match(/Announcement:?\s*([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i); } if (nextAnnouncementMatch) data.next_announcement_date = nextAnnouncementMatch[1]; // Announcement timing const timingMatch = fullText.match(/(Before Market Open|After Market Close)/i); if (timingMatch) data.announcement_timing = timingMatch[1]; // Number of analysts const analystsMatch = fullText.match(/With ([0-9]+) analysts covering/i); if (analystsMatch) data.analysts_covering = analystsMatch[1]; // Consensus estimate const consensusMatch = fullText.match(/consensus.*?estimate is \\$([0-9.]+)/i); if (consensusMatch) data.consensus_estimate = consensusMatch[1]; // High/Low estimates const highLowMatch = fullText.match(/high and low estimates are \\$([0-9.]+) and \\$([0-9.]+)/i); if (highLowMatch) { data.estimate_high = highLowMatch[1]; data.estimate_low = highLowMatch[2]; } // EPS TTM (multiple patterns) let epsMatch = fullText.match(/EPS\s*\(TTM\)\s*(?:Value)?\s*\$?([0-9.-]+)/i); if (!epsMatch) epsMatch = fullText.match(/Earnings per Share\s*\(?TTM\)?\s*(?:Value)?\s*\$?([0-9.-]+)/i); if (!epsMatch) epsMatch = fullText.match(/EPS\s+(?:Value)?\s*([0-9.-]+)/i); if (epsMatch) data.eps_ttm = epsMatch[1]; // Revenue TTM let revenueMatch = fullText.match(/Revenue\s*\(TTM\)\s*(?:Value)?\s*\$([0-9.]+[KMBT]?)/i); if (!revenueMatch) revenueMatch = fullText.match(/Revenue\s+(?:Value)?\s*\$([0-9.]+[KMBT])/i); if (revenueMatch) data.revenue_ttm = revenueMatch[1]; // P/E TTM (multiple patterns) let peMatch = fullText.match(/Price[\/\s]*Earnings\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i); if (!peMatch) peMatch = fullText.match(/P[\/\s]*E\s*\(?TTM\)?\s*(?:Value)?\s*([0-9.]+)/i); if (!peMatch) peMatch = fullText.match(/PE Ratio\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i); if (peMatch) data.pe_ttm = peMatch[1]; // Forward P/E let forwardPeMatch = fullText.match(/Forward\s+P[\/\s]*E\s*(?:Value)?\s*([0-9.]+)/i); if (!forwardPeMatch) forwardPeMatch = fullText.match(/P[\/\s]*E\s*\(Forward\)\s*(?:Value)?\s*([0-9.]+)/i); if (forwardPeMatch) data.forward_pe = forwardPeMatch[1]; // PEG Ratio let pegMatch = fullText.match(/Price\s+to\s+Earnings[\/\s]*Growth\s*\(PEG\)\s*(?:Value)?\s*([0-9.]+)/i); if (!pegMatch) pegMatch = fullText.match(/PEG\s*Ratio?\s*(?:Value)?\s*([0-9.]+)/i); if (pegMatch) data.peg_ratio = pegMatch[1]; // Recent beats/misses (simplified - just extract beat amounts) const beatMatches = fullText.matchAll(/Beat.*?\$([0-9.]+)/gi); data.recent_beats = []; for (const match of beatMatches) { data.recent_beats.push(match[1]); } return data; } ''', debug) # Parse and assign values earnings.next_announcement_date = earnings_data.get('next_announcement_date') earnings.announcement_timing = earnings_data.get('announcement_timing') earnings.analysts_covering = _parse_int(earnings_data.get('analysts_covering')) earnings.consensus_estimate = _parse_float(earnings_data.get('consensus_estimate')) earnings.estimate_high = _parse_float(earnings_data.get('estimate_high')) earnings.estimate_low = _parse_float(earnings_data.get('estimate_low')) earnings.eps_ttm = _parse_float(earnings_data.get('eps_ttm')) earnings.revenue_ttm = _parse_revenue(earnings_data.get('revenue_ttm', '')) earnings.pe_ttm = _parse_float(earnings_data.get('pe_ttm')) earnings.forward_pe = _parse_float(earnings_data.get('forward_pe')) earnings.peg_ratio = _parse_float(earnings_data.get('peg_ratio')) # Store recent beats as list of dicts if earnings_data.get('recent_beats'): earnings.recent_beats = [ {'beat_amount': _parse_float(beat)} for beat in earnings_data.get('recent_beats', []) ] if debug: logger.debug(f"Extracted earnings data: eps_ttm={earnings.eps_ttm}, " f"pe_ttm={earnings.pe_ttm}, forward_pe={earnings.forward_pe}") except Exception as e: if debug: logger.debug(f"Error extracting earnings data: {e}") return earnings def calculate_payout_ratio(annual_dividend: Optional[float], eps_ttm: Optional[float]) -> Optional[float]: """Calculate dividend payout ratio. Formula: (Annual Dividend Rate / EPS TTM) × 100 Args: annual_dividend: Annual dividend rate per share eps_ttm: Earnings per share (trailing twelve months) Returns: Payout ratio as percentage, or None if cannot calculate """ if annual_dividend and eps_ttm and eps_ttm > 0: ratio = (annual_dividend / eps_ttm) * 100 return round(ratio, 2) return None async def extract_phase1_data(page, debug: bool = False) -> EquityPhase1Data: """Extract all Phase 1 data points. Args: page: Playwright page object debug: Enable debug output Returns: EquityPhase1Data object with all extracted data """ if debug: logger.debug("Starting Phase 1 data extraction...") # Wait for page to stabilize await page.wait_for_timeout(3000) # Extract ticker from page URL ticker = await page.evaluate(''' () => { const url = window.location.href; const match = url.match(/stocks\\/([A-Z]+)/i); return match ? match[1].toUpperCase() : ''; } ''') # Extract each section quote = await extract_quote_data(page, ticker=ticker, debug=debug) dividends = await extract_enhanced_dividends(page, debug=debug) earnings = await extract_earnings_data(page, debug=debug) # Calculate derived metrics calculated = CalculatedMetrics() if dividends.annual_rate and earnings.eps_ttm: calculated.payout_ratio = calculate_payout_ratio( dividends.annual_rate, earnings.eps_ttm ) # Create Phase 1 data object phase1_data = EquityPhase1Data( ticker=ticker, quote=quote, dividends=dividends, earnings=earnings, calculated_metrics=calculated ) if debug: logger.debug(f"Phase 1 extraction complete for {ticker}") return phase1_data