schwab-mcp-custom/schwab_scraper/features/equity/phase1_scraper.py

"""Phase 1: Essential Dividend Metrics Implementation (DEPRECATED)

⚠️ DEPRECATED: This DOM-scraping based approach has been replaced by phase1_api_scraper.py
which uses Schwab's REST APIs directly. The API approach is more reliable, complete,
and maintainable than DOM scraping.

This module is kept for reference only. New code should use phase1_api_scraper.py.

Old approach extracts from DOM:
- Quote/Price Data (symbol bar)
- Enhanced Dividend Information (forward-looking dates)
- Core Earnings Metrics (EPS, forecasts)
- Basic Valuation Ratios (P/E, Forward P/E, PEG)
- Calculated Metrics (payout ratio)
"""

from typing import Dict, Any, Optional
import re
import logging

from ...core import QuoteData, EnhancedDividends, EarningsData, CalculatedMetrics, EquityPhase1Data


logger = logging.getLogger(__name__)


def _parse_float(value: Any) -> Optional[float]:
    """Safely parse a value to float, handling $ and % symbols."""
    if value is None:
        return None
    try:
        # Remove common formatting characters
        clean = str(value).strip().replace('$', '').replace(',', '').replace('%', '')
        if clean and clean != '--' and clean.lower() != 'n/a':
            return float(clean)
    except (ValueError, AttributeError):
        pass
    return None


def _parse_int(value: Any) -> Optional[int]:
    """Safely parse a value to int."""
    if value is None:
        return None
    try:
        clean = str(value).strip().replace(',', '')
        if clean and clean != '--' and clean.lower() != 'n/a':
            return int(float(clean))
    except (ValueError, AttributeError):
        pass
    return None


def _parse_volume(volume_str: str) -> Optional[int]:
    """Parse volume string like '8M', '22.4M', '1.2B' to integer."""
    if not volume_str:
        return None

    try:
        volume_str = volume_str.strip().upper()
        multiplier = 1

        if volume_str.endswith('K'):
            multiplier = 1_000
            volume_str = volume_str[:-1]
        elif volume_str.endswith('M'):
            multiplier = 1_000_000
            volume_str = volume_str[:-1]
        elif volume_str.endswith('B'):
            multiplier = 1_000_000_000
            volume_str = volume_str[:-1]

        value = float(volume_str)
        return int(value * multiplier)
    except (ValueError, AttributeError):
        return None


def _parse_revenue(revenue_str: str) -> Optional[float]:
    """Parse revenue string like '$92.15B', '$1.5M' to dollar value."""
    if not revenue_str:
        return None

    try:
        revenue_str = revenue_str.strip().upper().replace('$', '').replace(',', '')
        multiplier = 1

        if revenue_str.endswith('K'):
            multiplier = 1_000
            revenue_str = revenue_str[:-1]
        elif revenue_str.endswith('M'):
            multiplier = 1_000_000
            revenue_str = revenue_str[:-1]
        elif revenue_str.endswith('B'):
            multiplier = 1_000_000_000
            revenue_str = revenue_str[:-1]
        elif revenue_str.endswith('T'):
            multiplier = 1_000_000_000_000
            revenue_str = revenue_str[:-1]

        value = float(revenue_str)
        return value * multiplier
    except (ValueError, AttributeError):
        return None


async def extract_quote_data(page, ticker: str = "", debug: bool = False) -> QuoteData:
    """Extract quote/price data from symbol bar.

    Args:
        page: Playwright page object
        ticker: Stock ticker symbol (for pattern matching)
        debug: Enable debug logging

    Returns:
        QuoteData object with extracted fields
    """
    quote = QuoteData()

    try:
        if debug:
            logger.debug("Starting quote data extraction...")

        # Wait for symbol bar content (look for key labels)
        try:
            await page.wait_for_selector('#app-symbol-bar-component, text=Previous close', state='attached', timeout=15000)
        except Exception:
            if debug:
                logger.debug("Timeout waiting for symbol bar selector, attempting to parse whatever is there")

        # Extract symbol bar text content (fallback to body if specific component not found)
        symbol_bar_text = await page.evaluate('''
            () => {
                const symbolBar = document.querySelector('#app-symbol-bar-component');
                if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) return symbolBar.textContent;

                // If specific component not found, try to find the container with market data
                // Look for container with "Previous close"
                const labels = Array.from(document.querySelectorAll('span, div, p'));
                const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
                if (prevCloseLabel) {
                    // Return the parent's text content (go up a few levels to capture all data)
                    let parent = prevCloseLabel.parentElement;
                    let count = 0;
                    while (parent && count < 8) {
                        if (parent.textContent.length > 300) return parent.textContent;
                        parent = parent.parentElement;
                        count++;
                    }
                }

                return document.body.textContent || '';
            }
        ''')

        if debug:
            logger.debug(f"Symbol bar text (first 500 chars): {symbol_bar_text[:500]}")

        # Extract structured data
        quote_data = await page.evaluate(r'''
            (ticker) => {
                const data = {};

                // Helper to get text content from page
                const getText = () => {
                   const symbolBar = document.querySelector('#app-symbol-bar-component');
                   // Verify it looks like the right component by checking for "Previous close"
                   if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) {
                       return symbolBar.textContent;
                   }

                   // Fallback logic
                   const labels = Array.from(document.querySelectorAll('span, div, p'));
                   const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
                   if (prevCloseLabel) {
                        let parent = prevCloseLabel.parentElement;
                        let count = 0;
                        while (parent && count < 8) {
                            if (parent.textContent.length > 300) return parent.textContent;
                            parent = parent.parentElement;
                            count++;
                        }
                   }

                   // Last resort: body text
                   return document.body.textContent || '';
                };

                const fullText = getText();

                // Try to find price in quote container first for accuracy
                const priceElement = document.querySelector('.symbol-quote-container, [data-testid="quote-price"]');
                if (priceElement) {
                    const priceText = priceElement.textContent || '';
                    const priceMatch = priceText.match(/\$([0-9,]+\.[0-9]+)/);
                    if (priceMatch) data.price = priceMatch[1].replace(',', '');
                } else {
                    // Fallback regex for price if element not found
                    // Look for price near top or just regex
                    const priceMatch = fullText.match(/\$([0-9,]+\.[0-9]{2})(\s|[+-]|$)/);
                    if (priceMatch) data.price = priceMatch[1].replace(',', '');
                }

                // After hours (using \s* for robustness)
                const afterHoursMatch = fullText.match(/After hours:?\s*\$([0-9,.]+)/i);
                if (afterHoursMatch) data.after_hours_price = afterHoursMatch[1].replace(',', '');

                const afterHoursChangeMatch = fullText.match(/After hours:.*?([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
                if (afterHoursChangeMatch) {
                    data.after_hours_change = afterHoursChangeMatch[1].replace('$', '').replace(',', '');
                    data.after_hours_change_percent = afterHoursChangeMatch[2];
                }

                // Bid/Ask (using \s* for robustness)
                const bidMatch = fullText.match(/Bid\s*\$([0-9,.]+)/i);
                if (bidMatch) data.bid = bidMatch[1].replace(',', '');

                const askMatch = fullText.match(/Ask\s*\$([0-9,.]+)/i);
                if (askMatch) data.ask = askMatch[1].replace(',', '');

                const bidAskSizeMatch = fullText.match(/Bid\/Ask Size\s*([0-9]+\/[0-9]+)/i);
                if (bidAskSizeMatch) data.bid_ask_size = bidAskSizeMatch[1];

                // Previous close and open (using \s* instead of \s+)
                const prevCloseMatch = fullText.match(/Previous close\s*\$([0-9,.]+)/i);
                if (prevCloseMatch) data.previous_close = prevCloseMatch[1].replace(',', '');

                const openMatch = fullText.match(/Today's open\s*\$([0-9,.]+)/i);
                if (openMatch) data.open = openMatch[1].replace(',', '');

                // Volume (using \s*)
                const volumeMatch = fullText.match(/Today's volume\s*([0-9.]+[KMB]?)/i);
                if (volumeMatch) data.volume = volumeMatch[1];

                const volumeVsAvgMatch = fullText.match(/Today's volume\s*[0-9.]+[KMB]?\s*(Above Avg\.|Below Avg\.|Average)/i);
                if (volumeVsAvgMatch) data.volume_vs_avg = volumeVsAvgMatch[1];

                // Day range
                // Pattern: "Today's range low $200.81 Today's range high $203.45" or similar
                // We'll look for "low $X" and "high $Y" appearing after "Today's range"
                const dayRangeMatch = fullText.match(/Today's range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
                if (dayRangeMatch) {
                    data.day_range_low = dayRangeMatch[1].replace(',', '');
                    data.day_range_high = dayRangeMatch[2].replace(',', '');
                }

                // 52-week range
                const weekRangeMatch = fullText.match(/52-week range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
                if (weekRangeMatch) {
                    data.week_52_low = weekRangeMatch[1].replace(',', '');
                    data.week_52_high = weekRangeMatch[2].replace(',', '');
                }

                // Market cap (may be in Share Profile section)
                const marketCapMatch = fullText.match(/Market Cap\s*\$([0-9.]+[KMBT])/i);
                if (marketCapMatch) data.market_cap = marketCapMatch[1];

                // Change and change percent

                // Try specific formatted pattern first: TICKER $PRICE CHANGE CHANGE%
                // e.g. "JNJ $201.95 -1.03 -0.51%"
                const standardPattern = fullText.match(/\$([0-9,.]+)\s*([+-]?[0-9,.]+)\s*([+-]?[0-9.]+)%/);
                if (standardPattern) {
                     if (!data.price) data.price = standardPattern[1].replace(',', '');
                     data.change = standardPattern[2];
                     data.change_percent = standardPattern[3];
                }

                let percentMatch = null;
                if (ticker && !data.change_percent) {
                    // Match: TICKER$digits.digits{2}percent%
                    const tickerPattern = new RegExp(ticker + '\\\\.?[\\s]*\\$([0-9,]+\\\\.[0-9]{2})[\\s]*([0-9.]+)%', 'i');
                    percentMatch = fullText.match(tickerPattern);
                    if (percentMatch) {
                        data.change_percent = percentMatch[2];
                    }
                }

                if (!data.change_percent) {
                    // Fallback: match any price+percent pattern with space
                    const fallbackMatch = fullText.match(/\$[0-9,.]+\s*([+-]?[0-9.]+)%/);
                    if (fallbackMatch) {
                        data.change_percent = fallbackMatch[1];
                    }
                }

                // Pattern 2: "+$1.23 (+0.45%)" or "-$1.23 (-0.45%)"
                let changeMatch = fullText.match(/([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/);
                // Pattern 3: "$193.08 +1.23 +0.64%" (price followed by change)
                if (!changeMatch) {
                    changeMatch = fullText.match(/\$[0-9,.]+\s*([+-][0-9,.]+)\s*([+-][0-9.]+)%/);
                }
                // Pattern 4: "Change: +1.23 (+0.64%)"
                if (!changeMatch) {
                    changeMatch = fullText.match(/Change:?\s*([+-][0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
                }
                if (changeMatch) {
                    data.change = changeMatch[1].replace('$', '').replace(',', '');
                    if (!data.change_percent) {
                        data.change_percent = changeMatch[2].replace(/[+]/g, '');
                    }
                }

                // Exchange - look for NYSE, NASDAQ, etc.
                const exchangeMatch = fullText.match(/\b(NYSE|NASDAQ|AMEX|OTC|BATS)\b/i);
                if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();

                return data;
            }
        ''', ticker)

        # Parse and assign values
        quote.price = _parse_float(quote_data.get('price'))
        quote.change = _parse_float(quote_data.get('change'))
        quote.change_percent = _parse_float(quote_data.get('change_percent'))
        quote.after_hours_price = _parse_float(quote_data.get('after_hours_price'))
        quote.after_hours_change = _parse_float(quote_data.get('after_hours_change'))
        quote.after_hours_change_percent = _parse_float(quote_data.get('after_hours_change_percent'))
        quote.bid = _parse_float(quote_data.get('bid'))
        quote.ask = _parse_float(quote_data.get('ask'))
        quote.bid_ask_size = quote_data.get('bid_ask_size')
        quote.previous_close = _parse_float(quote_data.get('previous_close'))
        quote.open = _parse_float(quote_data.get('open'))
        quote.volume = _parse_volume(quote_data.get('volume', ''))
        quote.volume_vs_avg = quote_data.get('volume_vs_avg')
        quote.day_range_low = _parse_float(quote_data.get('day_range_low'))
        quote.day_range_high = _parse_float(quote_data.get('day_range_high'))
        quote.week_52_low = _parse_float(quote_data.get('week_52_low'))
        quote.week_52_high = _parse_float(quote_data.get('week_52_high'))
        quote.market_cap = quote_data.get('market_cap')

        # Try to extract sector and exchange from page header
        header_data = await page.evaluate(r'''
            () => {
                const data = {};

                // Look for sector near company name
                const sectorElement = document.querySelector('[data-testid="sector"], .sector');
                if (sectorElement) {
                    data.sector = sectorElement.textContent.replace('Sector', '').trim();
                } else {
                    // Manual search for text containing "Sector"
                    const spans = Array.from(document.querySelectorAll('span'));
                    const sectorSpan = spans.find(el => el.textContent && el.textContent.includes('Sector'));
                    if (sectorSpan) {
                         data.sector = sectorSpan.textContent.replace('Sector', '').replace(':', '').trim();
                    }
                }

                // Look for exchange near ticker
                const exchangeElement = document.querySelector('[data-testid="exchange"], .exchange');
                if (exchangeElement) {
                    data.exchange = exchangeElement.textContent.trim();
                }

                // Fallback: parse from page text
                const pageText = document.body.textContent || '';
                if (!data.sector) {
                    const sectorMatch = pageText.match(/Sector[:\s]+([A-Za-z\s&]+)/);
                    if (sectorMatch) data.sector = sectorMatch[1].trim();
                }
                if (!data.exchange) {
                    const exchangeMatch = pageText.match(/(NYSE|NASDAQ|AMEX|OTC)/i);
                    if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();
                }

                return data;
            }
        ''')

        quote.sector = header_data.get('sector')
        quote.exchange = header_data.get('exchange')

        if debug:
            logger.debug(f"Extracted quote data: price={quote.price}, volume={quote.volume}, "
                        f"52w_range={quote.week_52_low}-{quote.week_52_high}")

    except Exception as e:
        if debug:
            logger.debug(f"Error extracting quote data: {e}")

    return quote


async def extract_enhanced_dividends(page, debug: bool = False) -> EnhancedDividends:
    """Extract enhanced dividend data including next payment dates.

    Args:
        page: Playwright page object
        debug: Enable debug logging

    Returns:
        EnhancedDividends object with extracted fields
    """
    dividends = EnhancedDividends()

    try:
        if debug:
            logger.debug("Starting enhanced dividend extraction...")

        # Wait for dividends panel to load
        await page.wait_for_selector('#dividends', timeout=15000)

        # Scroll to dividends panel
        await page.evaluate('''
            () => {
                const dividendsPanel = document.querySelector('#dividends');
                if (dividendsPanel) {
                    dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
                }
            }
        ''')
        await page.wait_for_timeout(1000)

        # CRITICAL: Click on the panel header to trigger content loading
        # Schwab's panels don't auto-load - they need to be clicked
        if debug:
            logger.debug("Clicking dividends panel header to trigger content load...")
        try:
            dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title, #dividends-togglechevron-button')
            if dividends_header:
                await dividends_header.click()
                await page.wait_for_timeout(2000)
                if debug:
                    logger.debug("Clicked dividends panel header successfully")
        except Exception as e:
            if debug:
                logger.debug(f"Could not click dividends header: {e}")

        # Wait for content to load after click
        await page.wait_for_timeout(1000)

        # Extract dividend data
        dividend_data = await page.evaluate('''
            () => {
                const data = {};
                const dividendsPanel = document.querySelector('#dividends');
                if (!dividendsPanel) return data;

                const fullText = dividendsPanel.textContent || '';

                // DEBUG: Return sample of text for debugging
                data._debug_text_sample = fullText.substring(0, 800);

                // Next dividend payment
                const nextPaymentMatch = fullText.match(/Next Dividend Payment\\s*\\$([0-9.]+)/i);
                if (nextPaymentMatch) data.next_payment = nextPaymentMatch[1];

                // Next pay date
                const nextPayDateMatch = fullText.match(/Next Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (nextPayDateMatch) data.next_pay_date = nextPayDateMatch[1];

                // Next ex-date
                const nextExDateMatch = fullText.match(/Next Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (nextExDateMatch) data.next_ex_date = nextExDateMatch[1];

                // Previous dividend payment
                const prevPaymentMatch = fullText.match(/Previous Dividend Payment\\s*\\$([0-9.]+)/i);
                if (prevPaymentMatch) data.previous_payment = prevPaymentMatch[1];

                // Previous pay date
                const prevPayDateMatch = fullText.match(/Previous Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (prevPayDateMatch) data.previous_pay_date = prevPayDateMatch[1];

                // Previous ex-date
                const prevExDateMatch = fullText.match(/Previous Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (prevExDateMatch) data.previous_ex_date = prevExDateMatch[1];

                // Frequency
                const frequencyMatch = fullText.match(/Frequency\\s*(Quarterly|Monthly|Annual|Semi-Annual)/i);
                if (frequencyMatch) data.frequency = frequencyMatch[1];

                // Annual Dividend Rate (IAD)
                const annualRateMatch = fullText.match(/Annual Dividend Rate.*?\\$([0-9.]+)/i);
                if (annualRateMatch) data.annual_rate = annualRateMatch[1];

                // Annual Dividend Yield - appears after "Annual Dividend Yield" text
                // Text pattern: "Annual Dividend Yield...2.71%"
                const yieldMatch = fullText.match(/Annual Dividend Yield[\\s\\S]{0,300}?([0-9]+\\.[0-9]+)%/i);
                if (yieldMatch) data.annual_yield = yieldMatch[1];

                return data;
            }
        ''')

        if debug and dividend_data.get('_debug_text_sample'):
            logger.debug(f"Dividend panel text sample: {dividend_data['_debug_text_sample']}")

        # Parse and assign values
        dividends.next_payment = _parse_float(dividend_data.get('next_payment'))
        dividends.next_pay_date = dividend_data.get('next_pay_date')
        dividends.next_ex_date = dividend_data.get('next_ex_date')
        dividends.previous_payment = _parse_float(dividend_data.get('previous_payment'))
        dividends.previous_pay_date = dividend_data.get('previous_pay_date')
        dividends.previous_ex_date = dividend_data.get('previous_ex_date')
        dividends.frequency = dividend_data.get('frequency')
        dividends.annual_rate = _parse_float(dividend_data.get('annual_rate'))
        dividends.annual_yield = _parse_float(dividend_data.get('annual_yield'))

        if debug:
            logger.debug(f"Extracted dividend data: next_payment={dividends.next_payment}, "
                        f"next_pay_date={dividends.next_pay_date}, annual_rate={dividends.annual_rate}")

    except Exception as e:
        if debug:
            logger.debug(f"Error extracting dividend data: {e}")

    return dividends


async def extract_earnings_data(page, debug: bool = False) -> EarningsData:
    """Extract earnings metrics and forecasts.

    Args:
        page: Playwright page object
        debug: Enable debug logging

    Returns:
        EarningsData object with extracted fields
    """
    earnings = EarningsData()

    try:
        if debug:
            logger.debug("Starting earnings data extraction...")

        # Wait for earnings panel to load
        await page.wait_for_selector('#expected-earnings', timeout=15000)

        # Scroll to earnings panel
        await page.evaluate('''
            () => {
                const earningsPanel = document.querySelector('#expected-earnings');
                if (earningsPanel) {
                    earningsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
                }
            }
        ''')
        await page.wait_for_timeout(1000)

        # CRITICAL: Click on the panel header to trigger content loading
        # Schwab's panels don't auto-load - they need to be clicked
        if debug:
            logger.debug("Clicking earnings panel header to trigger content load...")
        try:
            earnings_header = await page.query_selector('#expected-earnings h2, #expected-earnings .sdps-panel__title, #expected-earnings-heading, #expected-earnings-togglechevron-button')
            if earnings_header:
                await earnings_header.click()
                await page.wait_for_timeout(2000)
                if debug:
                    logger.debug("Clicked earnings panel header successfully")
        except Exception as e:
            if debug:
                logger.debug(f"Could not click earnings header: {e}")

        # Wait for content to load after click
        await page.wait_for_timeout(1000)

        # Check for and click "Show More" if present
        try:
            # Use JS to find and click - most robust way
            clicked = await page.evaluate('''
                () => {
                    const panel = document.querySelector('#expected-earnings');
                    if (!panel) return false;

                    // Find any element with "Show More" text
                    const elements = Array.from(panel.querySelectorAll('a, button, span, div'));
                    const showMore = elements.find(el => el.textContent.trim().toLowerCase() === "show more");

                    if (showMore) {
                        showMore.click();
                        return true;
                    }
                    return false;
                }
            ''')

            if clicked:
                if debug:
                    logger.debug("found and clicked 'Show More' via JS")
                await page.wait_for_timeout(2000)
            elif debug:
                logger.debug("'Show More' not found or not clickable")

        except Exception as e:
            if debug:
                logger.debug(f"Error checking for Show More: {e}")

        # Extract earnings data
        earnings_data = await page.evaluate(r'''
            (debug) => {
                const data = {};
                // Helper to get text content including Shadow DOMs
                const getDeepText = (root) => {
                    if (!root) return '';
                    if (root.nodeType === Node.TEXT_NODE) return root.textContent;
                    if (root.nodeType === Node.ELEMENT_NODE && root.shadowRoot) {
                        return getDeepText(root.shadowRoot);
                    }

                    let text = '';
                    const children = root.childNodes;
                    for (let i = 0; i < children.length; i++) {
                        text += getDeepText(children[i]);
                    }
                    return text;
                };

                const earningsPanel = document.querySelector('#expected-earnings');
                let fullText = '';

                if (earningsPanel) {
                     fullText = getDeepText(earningsPanel);
                }

                // Fallback to body deep text if panel seems empty
                if (fullText.length < 500 || !fullText.includes("Announcement")) {
                    fullText = getDeepText(document.body);
                }

                // Next earnings announcement - robust regex checking for various patterns
                let nextAnnouncementMatch = fullText.match(/Next Earnings Announcement.*?([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
                if (!nextAnnouncementMatch) {
                     // Try alternate pattern: Announcement: 12/12/2025
                     nextAnnouncementMatch = fullText.match(/Announcement:?\s*([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
                }
                if (nextAnnouncementMatch) data.next_announcement_date = nextAnnouncementMatch[1];

                // Announcement timing
                const timingMatch = fullText.match(/(Before Market Open|After Market Close)/i);
                if (timingMatch) data.announcement_timing = timingMatch[1];

                // Number of analysts
                const analystsMatch = fullText.match(/With ([0-9]+) analysts covering/i);
                if (analystsMatch) data.analysts_covering = analystsMatch[1];

                // Consensus estimate
                const consensusMatch = fullText.match(/consensus.*?estimate is \\$([0-9.]+)/i);
                if (consensusMatch) data.consensus_estimate = consensusMatch[1];

                // High/Low estimates
                const highLowMatch = fullText.match(/high and low estimates are \\$([0-9.]+) and \\$([0-9.]+)/i);
                if (highLowMatch) {
                    data.estimate_high = highLowMatch[1];
                    data.estimate_low = highLowMatch[2];
                }

                // EPS TTM (multiple patterns)
                let epsMatch = fullText.match(/EPS\s*\(TTM\)\s*(?:Value)?\s*\$?([0-9.-]+)/i);
                if (!epsMatch) epsMatch = fullText.match(/Earnings per Share\s*\(?TTM\)?\s*(?:Value)?\s*\$?([0-9.-]+)/i);
                if (!epsMatch) epsMatch = fullText.match(/EPS\s+(?:Value)?\s*([0-9.-]+)/i);
                if (epsMatch) data.eps_ttm = epsMatch[1];

                // Revenue TTM
                let revenueMatch = fullText.match(/Revenue\s*\(TTM\)\s*(?:Value)?\s*\$([0-9.]+[KMBT]?)/i);
                if (!revenueMatch) revenueMatch = fullText.match(/Revenue\s+(?:Value)?\s*\$([0-9.]+[KMBT])/i);
                if (revenueMatch) data.revenue_ttm = revenueMatch[1];

                // P/E TTM (multiple patterns)
                let peMatch = fullText.match(/Price[\/\s]*Earnings\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (!peMatch) peMatch = fullText.match(/P[\/\s]*E\s*\(?TTM\)?\s*(?:Value)?\s*([0-9.]+)/i);
                if (!peMatch) peMatch = fullText.match(/PE Ratio\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (peMatch) data.pe_ttm = peMatch[1];

                // Forward P/E
                let forwardPeMatch = fullText.match(/Forward\s+P[\/\s]*E\s*(?:Value)?\s*([0-9.]+)/i);
                if (!forwardPeMatch) forwardPeMatch = fullText.match(/P[\/\s]*E\s*\(Forward\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (forwardPeMatch) data.forward_pe = forwardPeMatch[1];

                // PEG Ratio
                let pegMatch = fullText.match(/Price\s+to\s+Earnings[\/\s]*Growth\s*\(PEG\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (!pegMatch) pegMatch = fullText.match(/PEG\s*Ratio?\s*(?:Value)?\s*([0-9.]+)/i);
                if (pegMatch) data.peg_ratio = pegMatch[1];

                // Recent beats/misses (simplified - just extract beat amounts)
                const beatMatches = fullText.matchAll(/Beat.*?\$([0-9.]+)/gi);
                data.recent_beats = [];
                for (const match of beatMatches) {
                    data.recent_beats.push(match[1]);
                }

                return data;
            }
        ''', debug)

        # Parse and assign values
        earnings.next_announcement_date = earnings_data.get('next_announcement_date')
        earnings.announcement_timing = earnings_data.get('announcement_timing')
        earnings.analysts_covering = _parse_int(earnings_data.get('analysts_covering'))
        earnings.consensus_estimate = _parse_float(earnings_data.get('consensus_estimate'))
        earnings.estimate_high = _parse_float(earnings_data.get('estimate_high'))
        earnings.estimate_low = _parse_float(earnings_data.get('estimate_low'))
        earnings.eps_ttm = _parse_float(earnings_data.get('eps_ttm'))
        earnings.revenue_ttm = _parse_revenue(earnings_data.get('revenue_ttm', ''))
        earnings.pe_ttm = _parse_float(earnings_data.get('pe_ttm'))
        earnings.forward_pe = _parse_float(earnings_data.get('forward_pe'))
        earnings.peg_ratio = _parse_float(earnings_data.get('peg_ratio'))

        # Store recent beats as list of dicts
        if earnings_data.get('recent_beats'):
            earnings.recent_beats = [
                {'beat_amount': _parse_float(beat)}
                for beat in earnings_data.get('recent_beats', [])
            ]

        if debug:
            logger.debug(f"Extracted earnings data: eps_ttm={earnings.eps_ttm}, "
                        f"pe_ttm={earnings.pe_ttm}, forward_pe={earnings.forward_pe}")

    except Exception as e:
        if debug:
            logger.debug(f"Error extracting earnings data: {e}")

    return earnings


def calculate_payout_ratio(annual_dividend: Optional[float], eps_ttm: Optional[float]) -> Optional[float]:
    """Calculate dividend payout ratio.

    Formula: (Annual Dividend Rate / EPS TTM) × 100

    Args:
        annual_dividend: Annual dividend rate per share
        eps_ttm: Earnings per share (trailing twelve months)

    Returns:
        Payout ratio as percentage, or None if cannot calculate
    """
    if annual_dividend and eps_ttm and eps_ttm > 0:
        ratio = (annual_dividend / eps_ttm) * 100
        return round(ratio, 2)
    return None


async def extract_phase1_data(page, debug: bool = False) -> EquityPhase1Data:
    """Extract all Phase 1 data points.

    Args:
        page: Playwright page object
        debug: Enable debug output

    Returns:
        EquityPhase1Data object with all extracted data
    """
    if debug:
        logger.debug("Starting Phase 1 data extraction...")

    # Wait for page to stabilize
    await page.wait_for_timeout(3000)

    # Extract ticker from page URL
    ticker = await page.evaluate('''
        () => {
            const url = window.location.href;
            const match = url.match(/stocks\\/([A-Z]+)/i);
            return match ? match[1].toUpperCase() : '';
        }
    ''')

    # Extract each section
    quote = await extract_quote_data(page, ticker=ticker, debug=debug)
    dividends = await extract_enhanced_dividends(page, debug=debug)
    earnings = await extract_earnings_data(page, debug=debug)

    # Calculate derived metrics
    calculated = CalculatedMetrics()
    if dividends.annual_rate and earnings.eps_ttm:
        calculated.payout_ratio = calculate_payout_ratio(
            dividends.annual_rate,
            earnings.eps_ttm
        )

    # Create Phase 1 data object
    phase1_data = EquityPhase1Data(
        ticker=ticker,
        quote=quote,
        dividends=dividends,
        earnings=earnings,
        calculated_metrics=calculated
    )

    if debug:
        logger.debug(f"Phase 1 extraction complete for {ticker}")

    return phase1_data