Fix build: Bundle schwab_scraper source and use local dependencies

2026-04-24 01:50:20 +00:00
parent 02ac293692
commit 650ea2d087
43 changed files with 10900 additions and 41 deletions
--- a/schwab_scraper/features/equity/phase1_scraper.py
+++ b/schwab_scraper/features/equity/phase1_scraper.py
@@ -0,0 +1,786 @@
+"""Phase 1: Essential Dividend Metrics Implementation (DEPRECATED)
+
+⚠️ DEPRECATED: This DOM-scraping based approach has been replaced by phase1_api_scraper.py
+which uses Schwab's REST APIs directly. The API approach is more reliable, complete,
+and maintainable than DOM scraping.
+
+This module is kept for reference only. New code should use phase1_api_scraper.py.
+
+Old approach extracts from DOM:
+- Quote/Price Data (symbol bar)
+- Enhanced Dividend Information (forward-looking dates)
+- Core Earnings Metrics (EPS, forecasts)
+- Basic Valuation Ratios (P/E, Forward P/E, PEG)
+- Calculated Metrics (payout ratio)
+"""
+
+from typing import Dict, Any, Optional
+import re
+import logging
+
+from ...core import QuoteData, EnhancedDividends, EarningsData, CalculatedMetrics, EquityPhase1Data
+
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_float(value: Any) -> Optional[float]:
+    """Safely parse a value to float, handling $ and % symbols."""
+    if value is None:
+        return None
+    try:
+        # Remove common formatting characters
+        clean = str(value).strip().replace('$', '').replace(',', '').replace('%', '')
+        if clean and clean != '--' and clean.lower() != 'n/a':
+            return float(clean)
+    except (ValueError, AttributeError):
+        pass
+    return None
+
+
+def _parse_int(value: Any) -> Optional[int]:
+    """Safely parse a value to int."""
+    if value is None:
+        return None
+    try:
+        clean = str(value).strip().replace(',', '')
+        if clean and clean != '--' and clean.lower() != 'n/a':
+            return int(float(clean))
+    except (ValueError, AttributeError):
+        pass
+    return None
+
+
+def _parse_volume(volume_str: str) -> Optional[int]:
+    """Parse volume string like '8M', '22.4M', '1.2B' to integer."""
+    if not volume_str:
+        return None
+    
+    try:
+        volume_str = volume_str.strip().upper()
+        multiplier = 1
+        
+        if volume_str.endswith('K'):
+            multiplier = 1_000
+            volume_str = volume_str[:-1]
+        elif volume_str.endswith('M'):
+            multiplier = 1_000_000
+            volume_str = volume_str[:-1]
+        elif volume_str.endswith('B'):
+            multiplier = 1_000_000_000
+            volume_str = volume_str[:-1]
+        
+        value = float(volume_str)
+        return int(value * multiplier)
+    except (ValueError, AttributeError):
+        return None
+
+
+def _parse_revenue(revenue_str: str) -> Optional[float]:
+    """Parse revenue string like '$92.15B', '$1.5M' to dollar value."""
+    if not revenue_str:
+        return None
+    
+    try:
+        revenue_str = revenue_str.strip().upper().replace('$', '').replace(',', '')
+        multiplier = 1
+        
+        if revenue_str.endswith('K'):
+            multiplier = 1_000
+            revenue_str = revenue_str[:-1]
+        elif revenue_str.endswith('M'):
+            multiplier = 1_000_000
+            revenue_str = revenue_str[:-1]
+        elif revenue_str.endswith('B'):
+            multiplier = 1_000_000_000
+            revenue_str = revenue_str[:-1]
+        elif revenue_str.endswith('T'):
+            multiplier = 1_000_000_000_000
+            revenue_str = revenue_str[:-1]
+        
+        value = float(revenue_str)
+        return value * multiplier
+    except (ValueError, AttributeError):
+        return None
+
+
+async def extract_quote_data(page, ticker: str = "", debug: bool = False) -> QuoteData:
+    """Extract quote/price data from symbol bar.
+    
+    Args:
+        page: Playwright page object
+        ticker: Stock ticker symbol (for pattern matching)
+        debug: Enable debug logging
+        
+    Returns:
+        QuoteData object with extracted fields
+    """
+    quote = QuoteData()
+    
+    try:
+        if debug:
+            logger.debug("Starting quote data extraction...")
+        
+        # Wait for symbol bar content (look for key labels)
+        try:
+            await page.wait_for_selector('#app-symbol-bar-component, text=Previous close', state='attached', timeout=15000)
+        except Exception:
+            if debug:
+                logger.debug("Timeout waiting for symbol bar selector, attempting to parse whatever is there")
+        
+        # Extract symbol bar text content (fallback to body if specific component not found)
+        symbol_bar_text = await page.evaluate('''
+            () => {
+                const symbolBar = document.querySelector('#app-symbol-bar-component');
+                if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) return symbolBar.textContent;
+                
+                // If specific component not found, try to find the container with market data
+                // Look for container with "Previous close"
+                const labels = Array.from(document.querySelectorAll('span, div, p'));
+                const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
+                if (prevCloseLabel) {
+                    // Return the parent's text content (go up a few levels to capture all data)
+                    let parent = prevCloseLabel.parentElement;
+                    let count = 0;
+                    while (parent && count < 8) {
+                        if (parent.textContent.length > 300) return parent.textContent;
+                        parent = parent.parentElement;
+                        count++;
+                    }
+                }
+                
+                return document.body.textContent || '';
+            }
+        ''')
+        
+        if debug:
+            logger.debug(f"Symbol bar text (first 500 chars): {symbol_bar_text[:500]}")
+        
+        # Extract structured data
+        quote_data = await page.evaluate(r'''
+            (ticker) => {
+                const data = {};
+                
+                // Helper to get text content from page
+                const getText = () => {
+                   const symbolBar = document.querySelector('#app-symbol-bar-component');
+                   // Verify it looks like the right component by checking for "Previous close"
+                   if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) {
+                       return symbolBar.textContent;
+                   }
+                   
+                   // Fallback logic
+                   const labels = Array.from(document.querySelectorAll('span, div, p'));
+                   const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
+                   if (prevCloseLabel) {
+                        let parent = prevCloseLabel.parentElement;
+                        let count = 0;
+                        while (parent && count < 8) {
+                            if (parent.textContent.length > 300) return parent.textContent;
+                            parent = parent.parentElement;
+                            count++;
+                        }
+                   }
+                   
+                   // Last resort: body text
+                   return document.body.textContent || '';
+                };
+                
+                const fullText = getText();
+                
+                // Try to find price in quote container first for accuracy
+                const priceElement = document.querySelector('.symbol-quote-container, [data-testid="quote-price"]');
+                if (priceElement) {
+                    const priceText = priceElement.textContent || '';
+                    const priceMatch = priceText.match(/\$([0-9,]+\.[0-9]+)/);
+                    if (priceMatch) data.price = priceMatch[1].replace(',', '');
+                } else {
+                    // Fallback regex for price if element not found
+                    // Look for price near top or just regex
+                    const priceMatch = fullText.match(/\$([0-9,]+\.[0-9]{2})(\s|[+-]|$)/);
+                    if (priceMatch) data.price = priceMatch[1].replace(',', '');
+                }
+                
+                // After hours (using \s* for robustness)
+                const afterHoursMatch = fullText.match(/After hours:?\s*\$([0-9,.]+)/i);
+                if (afterHoursMatch) data.after_hours_price = afterHoursMatch[1].replace(',', '');
+                
+                const afterHoursChangeMatch = fullText.match(/After hours:.*?([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
+                if (afterHoursChangeMatch) {
+                    data.after_hours_change = afterHoursChangeMatch[1].replace('$', '').replace(',', '');
+                    data.after_hours_change_percent = afterHoursChangeMatch[2];
+                }
+                
+                // Bid/Ask (using \s* for robustness)
+                const bidMatch = fullText.match(/Bid\s*\$([0-9,.]+)/i);
+                if (bidMatch) data.bid = bidMatch[1].replace(',', '');
+                
+                const askMatch = fullText.match(/Ask\s*\$([0-9,.]+)/i);
+                if (askMatch) data.ask = askMatch[1].replace(',', '');
+                
+                const bidAskSizeMatch = fullText.match(/Bid\/Ask Size\s*([0-9]+\/[0-9]+)/i);
+                if (bidAskSizeMatch) data.bid_ask_size = bidAskSizeMatch[1];
+                
+                // Previous close and open (using \s* instead of \s+)
+                const prevCloseMatch = fullText.match(/Previous close\s*\$([0-9,.]+)/i);
+                if (prevCloseMatch) data.previous_close = prevCloseMatch[1].replace(',', '');
+                
+                const openMatch = fullText.match(/Today's open\s*\$([0-9,.]+)/i);
+                if (openMatch) data.open = openMatch[1].replace(',', '');
+                
+                // Volume (using \s*)
+                const volumeMatch = fullText.match(/Today's volume\s*([0-9.]+[KMB]?)/i);
+                if (volumeMatch) data.volume = volumeMatch[1];
+                
+                const volumeVsAvgMatch = fullText.match(/Today's volume\s*[0-9.]+[KMB]?\s*(Above Avg\.|Below Avg\.|Average)/i);
+                if (volumeVsAvgMatch) data.volume_vs_avg = volumeVsAvgMatch[1];
+                
+                // Day range
+                // Pattern: "Today's range low $200.81 Today's range high $203.45" or similar
+                // We'll look for "low $X" and "high $Y" appearing after "Today's range"
+                const dayRangeMatch = fullText.match(/Today's range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
+                if (dayRangeMatch) {
+                    data.day_range_low = dayRangeMatch[1].replace(',', '');
+                    data.day_range_high = dayRangeMatch[2].replace(',', '');
+                }
+                
+                // 52-week range
+                const weekRangeMatch = fullText.match(/52-week range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
+                if (weekRangeMatch) {
+                    data.week_52_low = weekRangeMatch[1].replace(',', '');
+                    data.week_52_high = weekRangeMatch[2].replace(',', '');
+                }
+                
+                // Market cap (may be in Share Profile section)
+                const marketCapMatch = fullText.match(/Market Cap\s*\$([0-9.]+[KMBT])/i);
+                if (marketCapMatch) data.market_cap = marketCapMatch[1];
+                
+                // Change and change percent
+                
+                // Try specific formatted pattern first: TICKER $PRICE CHANGE CHANGE%
+                // e.g. "JNJ $201.95 -1.03 -0.51%"
+                const standardPattern = fullText.match(/\$([0-9,.]+)\s*([+-]?[0-9,.]+)\s*([+-]?[0-9.]+)%/);
+                if (standardPattern) {
+                     if (!data.price) data.price = standardPattern[1].replace(',', '');
+                     data.change = standardPattern[2];
+                     data.change_percent = standardPattern[3];
+                }
+                
+                let percentMatch = null;
+                if (ticker && !data.change_percent) {
+                    // Match: TICKER$digits.digits{2}percent%
+                    const tickerPattern = new RegExp(ticker + '\\\\.?[\\s]*\\$([0-9,]+\\\\.[0-9]{2})[\\s]*([0-9.]+)%', 'i');
+                    percentMatch = fullText.match(tickerPattern);
+                    if (percentMatch) {
+                        data.change_percent = percentMatch[2]; 
+                    }
+                }
+                
+                if (!data.change_percent) {
+                    // Fallback: match any price+percent pattern with space
+                    const fallbackMatch = fullText.match(/\$[0-9,.]+\s*([+-]?[0-9.]+)%/);
+                    if (fallbackMatch) {
+                        data.change_percent = fallbackMatch[1];
+                    }
+                }
+                
+                // Pattern 2: "+$1.23 (+0.45%)" or "-$1.23 (-0.45%)"
+                let changeMatch = fullText.match(/([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/);
+                // Pattern 3: "$193.08 +1.23 +0.64%" (price followed by change)
+                if (!changeMatch) {
+                    changeMatch = fullText.match(/\$[0-9,.]+\s*([+-][0-9,.]+)\s*([+-][0-9.]+)%/);
+                }
+                // Pattern 4: "Change: +1.23 (+0.64%)"
+                if (!changeMatch) {
+                    changeMatch = fullText.match(/Change:?\s*([+-][0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
+                }
+                if (changeMatch) {
+                    data.change = changeMatch[1].replace('$', '').replace(',', '');
+                    if (!data.change_percent) {
+                        data.change_percent = changeMatch[2].replace(/[+]/g, '');
+                    }
+                }
+                
+                // Exchange - look for NYSE, NASDAQ, etc.
+                const exchangeMatch = fullText.match(/\b(NYSE|NASDAQ|AMEX|OTC|BATS)\b/i);
+                if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();
+                
+                return data;
+            }
+        ''', ticker)
+        
+        # Parse and assign values
+        quote.price = _parse_float(quote_data.get('price'))
+        quote.change = _parse_float(quote_data.get('change'))
+        quote.change_percent = _parse_float(quote_data.get('change_percent'))
+        quote.after_hours_price = _parse_float(quote_data.get('after_hours_price'))
+        quote.after_hours_change = _parse_float(quote_data.get('after_hours_change'))
+        quote.after_hours_change_percent = _parse_float(quote_data.get('after_hours_change_percent'))
+        quote.bid = _parse_float(quote_data.get('bid'))
+        quote.ask = _parse_float(quote_data.get('ask'))
+        quote.bid_ask_size = quote_data.get('bid_ask_size')
+        quote.previous_close = _parse_float(quote_data.get('previous_close'))
+        quote.open = _parse_float(quote_data.get('open'))
+        quote.volume = _parse_volume(quote_data.get('volume', ''))
+        quote.volume_vs_avg = quote_data.get('volume_vs_avg')
+        quote.day_range_low = _parse_float(quote_data.get('day_range_low'))
+        quote.day_range_high = _parse_float(quote_data.get('day_range_high'))
+        quote.week_52_low = _parse_float(quote_data.get('week_52_low'))
+        quote.week_52_high = _parse_float(quote_data.get('week_52_high'))
+        quote.market_cap = quote_data.get('market_cap')
+        
+        # Try to extract sector and exchange from page header
+        header_data = await page.evaluate(r'''
+            () => {
+                const data = {};
+                
+                // Look for sector near company name
+                const sectorElement = document.querySelector('[data-testid="sector"], .sector');
+                if (sectorElement) {
+                    data.sector = sectorElement.textContent.replace('Sector', '').trim();
+                } else {
+                    // Manual search for text containing "Sector"
+                    const spans = Array.from(document.querySelectorAll('span'));
+                    const sectorSpan = spans.find(el => el.textContent && el.textContent.includes('Sector'));
+                    if (sectorSpan) {
+                         data.sector = sectorSpan.textContent.replace('Sector', '').replace(':', '').trim();
+                    }
+                }
+                
+                // Look for exchange near ticker
+                const exchangeElement = document.querySelector('[data-testid="exchange"], .exchange');
+                if (exchangeElement) {
+                    data.exchange = exchangeElement.textContent.trim();
+                }
+                
+                // Fallback: parse from page text
+                const pageText = document.body.textContent || '';
+                if (!data.sector) {
+                    const sectorMatch = pageText.match(/Sector[:\s]+([A-Za-z\s&]+)/);
+                    if (sectorMatch) data.sector = sectorMatch[1].trim();
+                }
+                if (!data.exchange) {
+                    const exchangeMatch = pageText.match(/(NYSE|NASDAQ|AMEX|OTC)/i);
+                    if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();
+                }
+                
+                return data;
+            }
+        ''')
+        
+        quote.sector = header_data.get('sector')
+        quote.exchange = header_data.get('exchange')
+        
+        if debug:
+            logger.debug(f"Extracted quote data: price={quote.price}, volume={quote.volume}, "
+                        f"52w_range={quote.week_52_low}-{quote.week_52_high}")
+    
+    except Exception as e:
+        if debug:
+            logger.debug(f"Error extracting quote data: {e}")
+    
+    return quote
+
+
+async def extract_enhanced_dividends(page, debug: bool = False) -> EnhancedDividends:
+    """Extract enhanced dividend data including next payment dates.
+    
+    Args:
+        page: Playwright page object
+        debug: Enable debug logging
+        
+    Returns:
+        EnhancedDividends object with extracted fields
+    """
+    dividends = EnhancedDividends()
+    
+    try:
+        if debug:
+            logger.debug("Starting enhanced dividend extraction...")
+        
+        # Wait for dividends panel to load
+        await page.wait_for_selector('#dividends', timeout=15000)
+        
+        # Scroll to dividends panel
+        await page.evaluate('''
+            () => {
+                const dividendsPanel = document.querySelector('#dividends');
+                if (dividendsPanel) {
+                    dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
+                }
+            }
+        ''')
+        await page.wait_for_timeout(1000)
+        
+        # CRITICAL: Click on the panel header to trigger content loading
+        # Schwab's panels don't auto-load - they need to be clicked
+        if debug:
+            logger.debug("Clicking dividends panel header to trigger content load...")
+        try:
+            dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title, #dividends-togglechevron-button')
+            if dividends_header:
+                await dividends_header.click()
+                await page.wait_for_timeout(2000)
+                if debug:
+                    logger.debug("Clicked dividends panel header successfully")
+        except Exception as e:
+            if debug:
+                logger.debug(f"Could not click dividends header: {e}")
+        
+        # Wait for content to load after click
+        await page.wait_for_timeout(1000)
+        
+        # Extract dividend data
+        dividend_data = await page.evaluate('''
+            () => {
+                const data = {};
+                const dividendsPanel = document.querySelector('#dividends');
+                if (!dividendsPanel) return data;
+                
+                const fullText = dividendsPanel.textContent || '';
+                
+                // DEBUG: Return sample of text for debugging
+                data._debug_text_sample = fullText.substring(0, 800);
+                
+                // Next dividend payment
+                const nextPaymentMatch = fullText.match(/Next Dividend Payment\\s*\\$([0-9.]+)/i);
+                if (nextPaymentMatch) data.next_payment = nextPaymentMatch[1];
+                
+                // Next pay date
+                const nextPayDateMatch = fullText.match(/Next Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
+                if (nextPayDateMatch) data.next_pay_date = nextPayDateMatch[1];
+                
+                // Next ex-date
+                const nextExDateMatch = fullText.match(/Next Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
+                if (nextExDateMatch) data.next_ex_date = nextExDateMatch[1];
+                
+                // Previous dividend payment
+                const prevPaymentMatch = fullText.match(/Previous Dividend Payment\\s*\\$([0-9.]+)/i);
+                if (prevPaymentMatch) data.previous_payment = prevPaymentMatch[1];
+                
+                // Previous pay date
+                const prevPayDateMatch = fullText.match(/Previous Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
+                if (prevPayDateMatch) data.previous_pay_date = prevPayDateMatch[1];
+                
+                // Previous ex-date
+                const prevExDateMatch = fullText.match(/Previous Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
+                if (prevExDateMatch) data.previous_ex_date = prevExDateMatch[1];
+                
+                // Frequency
+                const frequencyMatch = fullText.match(/Frequency\\s*(Quarterly|Monthly|Annual|Semi-Annual)/i);
+                if (frequencyMatch) data.frequency = frequencyMatch[1];
+                
+                // Annual Dividend Rate (IAD)
+                const annualRateMatch = fullText.match(/Annual Dividend Rate.*?\\$([0-9.]+)/i);
+                if (annualRateMatch) data.annual_rate = annualRateMatch[1];
+                
+                // Annual Dividend Yield - appears after "Annual Dividend Yield" text
+                // Text pattern: "Annual Dividend Yield...2.71%"
+                const yieldMatch = fullText.match(/Annual Dividend Yield[\\s\\S]{0,300}?([0-9]+\\.[0-9]+)%/i);
+                if (yieldMatch) data.annual_yield = yieldMatch[1];
+                
+                return data;
+            }
+        ''')
+        
+        if debug and dividend_data.get('_debug_text_sample'):
+            logger.debug(f"Dividend panel text sample: {dividend_data['_debug_text_sample']}")
+        
+        # Parse and assign values
+        dividends.next_payment = _parse_float(dividend_data.get('next_payment'))
+        dividends.next_pay_date = dividend_data.get('next_pay_date')
+        dividends.next_ex_date = dividend_data.get('next_ex_date')
+        dividends.previous_payment = _parse_float(dividend_data.get('previous_payment'))
+        dividends.previous_pay_date = dividend_data.get('previous_pay_date')
+        dividends.previous_ex_date = dividend_data.get('previous_ex_date')
+        dividends.frequency = dividend_data.get('frequency')
+        dividends.annual_rate = _parse_float(dividend_data.get('annual_rate'))
+        dividends.annual_yield = _parse_float(dividend_data.get('annual_yield'))
+        
+        if debug:
+            logger.debug(f"Extracted dividend data: next_payment={dividends.next_payment}, "
+                        f"next_pay_date={dividends.next_pay_date}, annual_rate={dividends.annual_rate}")
+    
+    except Exception as e:
+        if debug:
+            logger.debug(f"Error extracting dividend data: {e}")
+    
+    return dividends
+
+
+async def extract_earnings_data(page, debug: bool = False) -> EarningsData:
+    """Extract earnings metrics and forecasts.
+    
+    Args:
+        page: Playwright page object
+        debug: Enable debug logging
+        
+    Returns:
+        EarningsData object with extracted fields
+    """
+    earnings = EarningsData()
+    
+    try:
+        if debug:
+            logger.debug("Starting earnings data extraction...")
+        
+        # Wait for earnings panel to load
+        await page.wait_for_selector('#expected-earnings', timeout=15000)
+        
+        # Scroll to earnings panel
+        await page.evaluate('''
+            () => {
+                const earningsPanel = document.querySelector('#expected-earnings');
+                if (earningsPanel) {
+                    earningsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
+                }
+            }
+        ''')
+        await page.wait_for_timeout(1000)
+        
+        # CRITICAL: Click on the panel header to trigger content loading
+        # Schwab's panels don't auto-load - they need to be clicked
+        if debug:
+            logger.debug("Clicking earnings panel header to trigger content load...")
+        try:
+            earnings_header = await page.query_selector('#expected-earnings h2, #expected-earnings .sdps-panel__title, #expected-earnings-heading, #expected-earnings-togglechevron-button')
+            if earnings_header:
+                await earnings_header.click()
+                await page.wait_for_timeout(2000)
+                if debug:
+                    logger.debug("Clicked earnings panel header successfully")
+        except Exception as e:
+            if debug:
+                logger.debug(f"Could not click earnings header: {e}")
+        
+        # Wait for content to load after click
+        await page.wait_for_timeout(1000)
+
+        # Check for and click "Show More" if present
+        try:
+            # Use JS to find and click - most robust way
+            clicked = await page.evaluate('''
+                () => {
+                    const panel = document.querySelector('#expected-earnings');
+                    if (!panel) return false;
+                    
+                    // Find any element with "Show More" text
+                    const elements = Array.from(panel.querySelectorAll('a, button, span, div'));
+                    const showMore = elements.find(el => el.textContent.trim().toLowerCase() === "show more");
+                    
+                    if (showMore) {
+                        showMore.click();
+                        return true;
+                    }
+                    return false;
+                }
+            ''')
+            
+            if clicked:
+                if debug:
+                    logger.debug("found and clicked 'Show More' via JS")
+                await page.wait_for_timeout(2000)
+            elif debug:
+                logger.debug("'Show More' not found or not clickable")
+                
+        except Exception as e:
+            if debug:
+                logger.debug(f"Error checking for Show More: {e}")
+        
+        # Extract earnings data
+        earnings_data = await page.evaluate(r'''
+            (debug) => {
+                const data = {};
+                // Helper to get text content including Shadow DOMs
+                const getDeepText = (root) => {
+                    if (!root) return '';
+                    if (root.nodeType === Node.TEXT_NODE) return root.textContent;
+                    if (root.nodeType === Node.ELEMENT_NODE && root.shadowRoot) {
+                        return getDeepText(root.shadowRoot);
+                    }
+                    
+                    let text = '';
+                    const children = root.childNodes;
+                    for (let i = 0; i < children.length; i++) {
+                        text += getDeepText(children[i]);
+                    }
+                    return text;
+                };
+
+                const earningsPanel = document.querySelector('#expected-earnings');
+                let fullText = '';
+                
+                if (earningsPanel) {
+                     fullText = getDeepText(earningsPanel);
+                }
+                
+                // Fallback to body deep text if panel seems empty
+                if (fullText.length < 500 || !fullText.includes("Announcement")) {
+                    fullText = getDeepText(document.body);
+                }
+                
+                // Next earnings announcement - robust regex checking for various patterns
+                let nextAnnouncementMatch = fullText.match(/Next Earnings Announcement.*?([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
+                if (!nextAnnouncementMatch) {
+                     // Try alternate pattern: Announcement: 12/12/2025
+                     nextAnnouncementMatch = fullText.match(/Announcement:?\s*([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
+                }
+                if (nextAnnouncementMatch) data.next_announcement_date = nextAnnouncementMatch[1];
+                
+                // Announcement timing
+                const timingMatch = fullText.match(/(Before Market Open|After Market Close)/i);
+                if (timingMatch) data.announcement_timing = timingMatch[1];
+                
+                // Number of analysts
+                const analystsMatch = fullText.match(/With ([0-9]+) analysts covering/i);
+                if (analystsMatch) data.analysts_covering = analystsMatch[1];
+                
+                // Consensus estimate
+                const consensusMatch = fullText.match(/consensus.*?estimate is \\$([0-9.]+)/i);
+                if (consensusMatch) data.consensus_estimate = consensusMatch[1];
+                
+                // High/Low estimates
+                const highLowMatch = fullText.match(/high and low estimates are \\$([0-9.]+) and \\$([0-9.]+)/i);
+                if (highLowMatch) {
+                    data.estimate_high = highLowMatch[1];
+                    data.estimate_low = highLowMatch[2];
+                }
+                
+                // EPS TTM (multiple patterns)
+                let epsMatch = fullText.match(/EPS\s*\(TTM\)\s*(?:Value)?\s*\$?([0-9.-]+)/i);
+                if (!epsMatch) epsMatch = fullText.match(/Earnings per Share\s*\(?TTM\)?\s*(?:Value)?\s*\$?([0-9.-]+)/i);
+                if (!epsMatch) epsMatch = fullText.match(/EPS\s+(?:Value)?\s*([0-9.-]+)/i);
+                if (epsMatch) data.eps_ttm = epsMatch[1];
+                
+                // Revenue TTM
+                let revenueMatch = fullText.match(/Revenue\s*\(TTM\)\s*(?:Value)?\s*\$([0-9.]+[KMBT]?)/i);
+                if (!revenueMatch) revenueMatch = fullText.match(/Revenue\s+(?:Value)?\s*\$([0-9.]+[KMBT])/i);
+                if (revenueMatch) data.revenue_ttm = revenueMatch[1];
+                
+                // P/E TTM (multiple patterns)
+                let peMatch = fullText.match(/Price[\/\s]*Earnings\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
+                if (!peMatch) peMatch = fullText.match(/P[\/\s]*E\s*\(?TTM\)?\s*(?:Value)?\s*([0-9.]+)/i);
+                if (!peMatch) peMatch = fullText.match(/PE Ratio\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
+                if (peMatch) data.pe_ttm = peMatch[1];
+                
+                // Forward P/E
+                let forwardPeMatch = fullText.match(/Forward\s+P[\/\s]*E\s*(?:Value)?\s*([0-9.]+)/i);
+                if (!forwardPeMatch) forwardPeMatch = fullText.match(/P[\/\s]*E\s*\(Forward\)\s*(?:Value)?\s*([0-9.]+)/i);
+                if (forwardPeMatch) data.forward_pe = forwardPeMatch[1];
+                
+                // PEG Ratio
+                let pegMatch = fullText.match(/Price\s+to\s+Earnings[\/\s]*Growth\s*\(PEG\)\s*(?:Value)?\s*([0-9.]+)/i);
+                if (!pegMatch) pegMatch = fullText.match(/PEG\s*Ratio?\s*(?:Value)?\s*([0-9.]+)/i);
+                if (pegMatch) data.peg_ratio = pegMatch[1];
+                
+                // Recent beats/misses (simplified - just extract beat amounts)
+                const beatMatches = fullText.matchAll(/Beat.*?\$([0-9.]+)/gi);
+                data.recent_beats = [];
+                for (const match of beatMatches) {
+                    data.recent_beats.push(match[1]);
+                }
+                
+                return data;
+            }
+        ''', debug)
+        
+        # Parse and assign values
+        earnings.next_announcement_date = earnings_data.get('next_announcement_date')
+        earnings.announcement_timing = earnings_data.get('announcement_timing')
+        earnings.analysts_covering = _parse_int(earnings_data.get('analysts_covering'))
+        earnings.consensus_estimate = _parse_float(earnings_data.get('consensus_estimate'))
+        earnings.estimate_high = _parse_float(earnings_data.get('estimate_high'))
+        earnings.estimate_low = _parse_float(earnings_data.get('estimate_low'))
+        earnings.eps_ttm = _parse_float(earnings_data.get('eps_ttm'))
+        earnings.revenue_ttm = _parse_revenue(earnings_data.get('revenue_ttm', ''))
+        earnings.pe_ttm = _parse_float(earnings_data.get('pe_ttm'))
+        earnings.forward_pe = _parse_float(earnings_data.get('forward_pe'))
+        earnings.peg_ratio = _parse_float(earnings_data.get('peg_ratio'))
+        
+        # Store recent beats as list of dicts
+        if earnings_data.get('recent_beats'):
+            earnings.recent_beats = [
+                {'beat_amount': _parse_float(beat)} 
+                for beat in earnings_data.get('recent_beats', [])
+            ]
+        
+        if debug:
+            logger.debug(f"Extracted earnings data: eps_ttm={earnings.eps_ttm}, "
+                        f"pe_ttm={earnings.pe_ttm}, forward_pe={earnings.forward_pe}")
+    
+    except Exception as e:
+        if debug:
+            logger.debug(f"Error extracting earnings data: {e}")
+    
+    return earnings
+
+
+def calculate_payout_ratio(annual_dividend: Optional[float], eps_ttm: Optional[float]) -> Optional[float]:
+    """Calculate dividend payout ratio.
+    
+    Formula: (Annual Dividend Rate / EPS TTM) × 100
+    
+    Args:
+        annual_dividend: Annual dividend rate per share
+        eps_ttm: Earnings per share (trailing twelve months)
+        
+    Returns:
+        Payout ratio as percentage, or None if cannot calculate
+    """
+    if annual_dividend and eps_ttm and eps_ttm > 0:
+        ratio = (annual_dividend / eps_ttm) * 100
+        return round(ratio, 2)
+    return None
+
+
+async def extract_phase1_data(page, debug: bool = False) -> EquityPhase1Data:
+    """Extract all Phase 1 data points.
+    
+    Args:
+        page: Playwright page object
+        debug: Enable debug output
+        
+    Returns:
+        EquityPhase1Data object with all extracted data
+    """
+    if debug:
+        logger.debug("Starting Phase 1 data extraction...")
+    
+    # Wait for page to stabilize
+    await page.wait_for_timeout(3000)
+    
+    # Extract ticker from page URL
+    ticker = await page.evaluate('''
+        () => {
+            const url = window.location.href;
+            const match = url.match(/stocks\\/([A-Z]+)/i);
+            return match ? match[1].toUpperCase() : '';
+        }
+    ''')
+    
+    # Extract each section
+    quote = await extract_quote_data(page, ticker=ticker, debug=debug)
+    dividends = await extract_enhanced_dividends(page, debug=debug)
+    earnings = await extract_earnings_data(page, debug=debug)
+    
+    # Calculate derived metrics
+    calculated = CalculatedMetrics()
+    if dividends.annual_rate and earnings.eps_ttm:
+        calculated.payout_ratio = calculate_payout_ratio(
+            dividends.annual_rate,
+            earnings.eps_ttm
+        )
+    
+    # Create Phase 1 data object
+    phase1_data = EquityPhase1Data(
+        ticker=ticker,
+        quote=quote,
+        dividends=dividends,
+        earnings=earnings,
+        calculated_metrics=calculated
+    )
+    
+    if debug:
+        logger.debug(f"Phase 1 extraction complete for {ticker}")
+    
+    return phase1_data