All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
787 lines
35 KiB
Python
787 lines
35 KiB
Python
"""Phase 1: Essential Dividend Metrics Implementation (DEPRECATED)
|
||
|
||
⚠️ DEPRECATED: This DOM-scraping based approach has been replaced by phase1_api_scraper.py
|
||
which uses Schwab's REST APIs directly. The API approach is more reliable, complete,
|
||
and maintainable than DOM scraping.
|
||
|
||
This module is kept for reference only. New code should use phase1_api_scraper.py.
|
||
|
||
Old approach extracts from DOM:
|
||
- Quote/Price Data (symbol bar)
|
||
- Enhanced Dividend Information (forward-looking dates)
|
||
- Core Earnings Metrics (EPS, forecasts)
|
||
- Basic Valuation Ratios (P/E, Forward P/E, PEG)
|
||
- Calculated Metrics (payout ratio)
|
||
"""
|
||
|
||
from typing import Dict, Any, Optional
|
||
import re
|
||
import logging
|
||
|
||
from ...core import QuoteData, EnhancedDividends, EarningsData, CalculatedMetrics, EquityPhase1Data
|
||
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def _parse_float(value: Any) -> Optional[float]:
|
||
"""Safely parse a value to float, handling $ and % symbols."""
|
||
if value is None:
|
||
return None
|
||
try:
|
||
# Remove common formatting characters
|
||
clean = str(value).strip().replace('$', '').replace(',', '').replace('%', '')
|
||
if clean and clean != '--' and clean.lower() != 'n/a':
|
||
return float(clean)
|
||
except (ValueError, AttributeError):
|
||
pass
|
||
return None
|
||
|
||
|
||
def _parse_int(value: Any) -> Optional[int]:
|
||
"""Safely parse a value to int."""
|
||
if value is None:
|
||
return None
|
||
try:
|
||
clean = str(value).strip().replace(',', '')
|
||
if clean and clean != '--' and clean.lower() != 'n/a':
|
||
return int(float(clean))
|
||
except (ValueError, AttributeError):
|
||
pass
|
||
return None
|
||
|
||
|
||
def _parse_volume(volume_str: str) -> Optional[int]:
|
||
"""Parse volume string like '8M', '22.4M', '1.2B' to integer."""
|
||
if not volume_str:
|
||
return None
|
||
|
||
try:
|
||
volume_str = volume_str.strip().upper()
|
||
multiplier = 1
|
||
|
||
if volume_str.endswith('K'):
|
||
multiplier = 1_000
|
||
volume_str = volume_str[:-1]
|
||
elif volume_str.endswith('M'):
|
||
multiplier = 1_000_000
|
||
volume_str = volume_str[:-1]
|
||
elif volume_str.endswith('B'):
|
||
multiplier = 1_000_000_000
|
||
volume_str = volume_str[:-1]
|
||
|
||
value = float(volume_str)
|
||
return int(value * multiplier)
|
||
except (ValueError, AttributeError):
|
||
return None
|
||
|
||
|
||
def _parse_revenue(revenue_str: str) -> Optional[float]:
|
||
"""Parse revenue string like '$92.15B', '$1.5M' to dollar value."""
|
||
if not revenue_str:
|
||
return None
|
||
|
||
try:
|
||
revenue_str = revenue_str.strip().upper().replace('$', '').replace(',', '')
|
||
multiplier = 1
|
||
|
||
if revenue_str.endswith('K'):
|
||
multiplier = 1_000
|
||
revenue_str = revenue_str[:-1]
|
||
elif revenue_str.endswith('M'):
|
||
multiplier = 1_000_000
|
||
revenue_str = revenue_str[:-1]
|
||
elif revenue_str.endswith('B'):
|
||
multiplier = 1_000_000_000
|
||
revenue_str = revenue_str[:-1]
|
||
elif revenue_str.endswith('T'):
|
||
multiplier = 1_000_000_000_000
|
||
revenue_str = revenue_str[:-1]
|
||
|
||
value = float(revenue_str)
|
||
return value * multiplier
|
||
except (ValueError, AttributeError):
|
||
return None
|
||
|
||
|
||
async def extract_quote_data(page, ticker: str = "", debug: bool = False) -> QuoteData:
|
||
"""Extract quote/price data from symbol bar.
|
||
|
||
Args:
|
||
page: Playwright page object
|
||
ticker: Stock ticker symbol (for pattern matching)
|
||
debug: Enable debug logging
|
||
|
||
Returns:
|
||
QuoteData object with extracted fields
|
||
"""
|
||
quote = QuoteData()
|
||
|
||
try:
|
||
if debug:
|
||
logger.debug("Starting quote data extraction...")
|
||
|
||
# Wait for symbol bar content (look for key labels)
|
||
try:
|
||
await page.wait_for_selector('#app-symbol-bar-component, text=Previous close', state='attached', timeout=15000)
|
||
except Exception:
|
||
if debug:
|
||
logger.debug("Timeout waiting for symbol bar selector, attempting to parse whatever is there")
|
||
|
||
# Extract symbol bar text content (fallback to body if specific component not found)
|
||
symbol_bar_text = await page.evaluate('''
|
||
() => {
|
||
const symbolBar = document.querySelector('#app-symbol-bar-component');
|
||
if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) return symbolBar.textContent;
|
||
|
||
// If specific component not found, try to find the container with market data
|
||
// Look for container with "Previous close"
|
||
const labels = Array.from(document.querySelectorAll('span, div, p'));
|
||
const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
|
||
if (prevCloseLabel) {
|
||
// Return the parent's text content (go up a few levels to capture all data)
|
||
let parent = prevCloseLabel.parentElement;
|
||
let count = 0;
|
||
while (parent && count < 8) {
|
||
if (parent.textContent.length > 300) return parent.textContent;
|
||
parent = parent.parentElement;
|
||
count++;
|
||
}
|
||
}
|
||
|
||
return document.body.textContent || '';
|
||
}
|
||
''')
|
||
|
||
if debug:
|
||
logger.debug(f"Symbol bar text (first 500 chars): {symbol_bar_text[:500]}")
|
||
|
||
# Extract structured data
|
||
quote_data = await page.evaluate(r'''
|
||
(ticker) => {
|
||
const data = {};
|
||
|
||
// Helper to get text content from page
|
||
const getText = () => {
|
||
const symbolBar = document.querySelector('#app-symbol-bar-component');
|
||
// Verify it looks like the right component by checking for "Previous close"
|
||
if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) {
|
||
return symbolBar.textContent;
|
||
}
|
||
|
||
// Fallback logic
|
||
const labels = Array.from(document.querySelectorAll('span, div, p'));
|
||
const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
|
||
if (prevCloseLabel) {
|
||
let parent = prevCloseLabel.parentElement;
|
||
let count = 0;
|
||
while (parent && count < 8) {
|
||
if (parent.textContent.length > 300) return parent.textContent;
|
||
parent = parent.parentElement;
|
||
count++;
|
||
}
|
||
}
|
||
|
||
// Last resort: body text
|
||
return document.body.textContent || '';
|
||
};
|
||
|
||
const fullText = getText();
|
||
|
||
// Try to find price in quote container first for accuracy
|
||
const priceElement = document.querySelector('.symbol-quote-container, [data-testid="quote-price"]');
|
||
if (priceElement) {
|
||
const priceText = priceElement.textContent || '';
|
||
const priceMatch = priceText.match(/\$([0-9,]+\.[0-9]+)/);
|
||
if (priceMatch) data.price = priceMatch[1].replace(',', '');
|
||
} else {
|
||
// Fallback regex for price if element not found
|
||
// Look for price near top or just regex
|
||
const priceMatch = fullText.match(/\$([0-9,]+\.[0-9]{2})(\s|[+-]|$)/);
|
||
if (priceMatch) data.price = priceMatch[1].replace(',', '');
|
||
}
|
||
|
||
// After hours (using \s* for robustness)
|
||
const afterHoursMatch = fullText.match(/After hours:?\s*\$([0-9,.]+)/i);
|
||
if (afterHoursMatch) data.after_hours_price = afterHoursMatch[1].replace(',', '');
|
||
|
||
const afterHoursChangeMatch = fullText.match(/After hours:.*?([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
|
||
if (afterHoursChangeMatch) {
|
||
data.after_hours_change = afterHoursChangeMatch[1].replace('$', '').replace(',', '');
|
||
data.after_hours_change_percent = afterHoursChangeMatch[2];
|
||
}
|
||
|
||
// Bid/Ask (using \s* for robustness)
|
||
const bidMatch = fullText.match(/Bid\s*\$([0-9,.]+)/i);
|
||
if (bidMatch) data.bid = bidMatch[1].replace(',', '');
|
||
|
||
const askMatch = fullText.match(/Ask\s*\$([0-9,.]+)/i);
|
||
if (askMatch) data.ask = askMatch[1].replace(',', '');
|
||
|
||
const bidAskSizeMatch = fullText.match(/Bid\/Ask Size\s*([0-9]+\/[0-9]+)/i);
|
||
if (bidAskSizeMatch) data.bid_ask_size = bidAskSizeMatch[1];
|
||
|
||
// Previous close and open (using \s* instead of \s+)
|
||
const prevCloseMatch = fullText.match(/Previous close\s*\$([0-9,.]+)/i);
|
||
if (prevCloseMatch) data.previous_close = prevCloseMatch[1].replace(',', '');
|
||
|
||
const openMatch = fullText.match(/Today's open\s*\$([0-9,.]+)/i);
|
||
if (openMatch) data.open = openMatch[1].replace(',', '');
|
||
|
||
// Volume (using \s*)
|
||
const volumeMatch = fullText.match(/Today's volume\s*([0-9.]+[KMB]?)/i);
|
||
if (volumeMatch) data.volume = volumeMatch[1];
|
||
|
||
const volumeVsAvgMatch = fullText.match(/Today's volume\s*[0-9.]+[KMB]?\s*(Above Avg\.|Below Avg\.|Average)/i);
|
||
if (volumeVsAvgMatch) data.volume_vs_avg = volumeVsAvgMatch[1];
|
||
|
||
// Day range
|
||
// Pattern: "Today's range low $200.81 Today's range high $203.45" or similar
|
||
// We'll look for "low $X" and "high $Y" appearing after "Today's range"
|
||
const dayRangeMatch = fullText.match(/Today's range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
|
||
if (dayRangeMatch) {
|
||
data.day_range_low = dayRangeMatch[1].replace(',', '');
|
||
data.day_range_high = dayRangeMatch[2].replace(',', '');
|
||
}
|
||
|
||
// 52-week range
|
||
const weekRangeMatch = fullText.match(/52-week range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
|
||
if (weekRangeMatch) {
|
||
data.week_52_low = weekRangeMatch[1].replace(',', '');
|
||
data.week_52_high = weekRangeMatch[2].replace(',', '');
|
||
}
|
||
|
||
// Market cap (may be in Share Profile section)
|
||
const marketCapMatch = fullText.match(/Market Cap\s*\$([0-9.]+[KMBT])/i);
|
||
if (marketCapMatch) data.market_cap = marketCapMatch[1];
|
||
|
||
// Change and change percent
|
||
|
||
// Try specific formatted pattern first: TICKER $PRICE CHANGE CHANGE%
|
||
// e.g. "JNJ $201.95 -1.03 -0.51%"
|
||
const standardPattern = fullText.match(/\$([0-9,.]+)\s*([+-]?[0-9,.]+)\s*([+-]?[0-9.]+)%/);
|
||
if (standardPattern) {
|
||
if (!data.price) data.price = standardPattern[1].replace(',', '');
|
||
data.change = standardPattern[2];
|
||
data.change_percent = standardPattern[3];
|
||
}
|
||
|
||
let percentMatch = null;
|
||
if (ticker && !data.change_percent) {
|
||
// Match: TICKER$digits.digits{2}percent%
|
||
const tickerPattern = new RegExp(ticker + '\\\\.?[\\s]*\\$([0-9,]+\\\\.[0-9]{2})[\\s]*([0-9.]+)%', 'i');
|
||
percentMatch = fullText.match(tickerPattern);
|
||
if (percentMatch) {
|
||
data.change_percent = percentMatch[2];
|
||
}
|
||
}
|
||
|
||
if (!data.change_percent) {
|
||
// Fallback: match any price+percent pattern with space
|
||
const fallbackMatch = fullText.match(/\$[0-9,.]+\s*([+-]?[0-9.]+)%/);
|
||
if (fallbackMatch) {
|
||
data.change_percent = fallbackMatch[1];
|
||
}
|
||
}
|
||
|
||
// Pattern 2: "+$1.23 (+0.45%)" or "-$1.23 (-0.45%)"
|
||
let changeMatch = fullText.match(/([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/);
|
||
// Pattern 3: "$193.08 +1.23 +0.64%" (price followed by change)
|
||
if (!changeMatch) {
|
||
changeMatch = fullText.match(/\$[0-9,.]+\s*([+-][0-9,.]+)\s*([+-][0-9.]+)%/);
|
||
}
|
||
// Pattern 4: "Change: +1.23 (+0.64%)"
|
||
if (!changeMatch) {
|
||
changeMatch = fullText.match(/Change:?\s*([+-][0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
|
||
}
|
||
if (changeMatch) {
|
||
data.change = changeMatch[1].replace('$', '').replace(',', '');
|
||
if (!data.change_percent) {
|
||
data.change_percent = changeMatch[2].replace(/[+]/g, '');
|
||
}
|
||
}
|
||
|
||
// Exchange - look for NYSE, NASDAQ, etc.
|
||
const exchangeMatch = fullText.match(/\b(NYSE|NASDAQ|AMEX|OTC|BATS)\b/i);
|
||
if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();
|
||
|
||
return data;
|
||
}
|
||
''', ticker)
|
||
|
||
# Parse and assign values
|
||
quote.price = _parse_float(quote_data.get('price'))
|
||
quote.change = _parse_float(quote_data.get('change'))
|
||
quote.change_percent = _parse_float(quote_data.get('change_percent'))
|
||
quote.after_hours_price = _parse_float(quote_data.get('after_hours_price'))
|
||
quote.after_hours_change = _parse_float(quote_data.get('after_hours_change'))
|
||
quote.after_hours_change_percent = _parse_float(quote_data.get('after_hours_change_percent'))
|
||
quote.bid = _parse_float(quote_data.get('bid'))
|
||
quote.ask = _parse_float(quote_data.get('ask'))
|
||
quote.bid_ask_size = quote_data.get('bid_ask_size')
|
||
quote.previous_close = _parse_float(quote_data.get('previous_close'))
|
||
quote.open = _parse_float(quote_data.get('open'))
|
||
quote.volume = _parse_volume(quote_data.get('volume', ''))
|
||
quote.volume_vs_avg = quote_data.get('volume_vs_avg')
|
||
quote.day_range_low = _parse_float(quote_data.get('day_range_low'))
|
||
quote.day_range_high = _parse_float(quote_data.get('day_range_high'))
|
||
quote.week_52_low = _parse_float(quote_data.get('week_52_low'))
|
||
quote.week_52_high = _parse_float(quote_data.get('week_52_high'))
|
||
quote.market_cap = quote_data.get('market_cap')
|
||
|
||
# Try to extract sector and exchange from page header
|
||
header_data = await page.evaluate(r'''
|
||
() => {
|
||
const data = {};
|
||
|
||
// Look for sector near company name
|
||
const sectorElement = document.querySelector('[data-testid="sector"], .sector');
|
||
if (sectorElement) {
|
||
data.sector = sectorElement.textContent.replace('Sector', '').trim();
|
||
} else {
|
||
// Manual search for text containing "Sector"
|
||
const spans = Array.from(document.querySelectorAll('span'));
|
||
const sectorSpan = spans.find(el => el.textContent && el.textContent.includes('Sector'));
|
||
if (sectorSpan) {
|
||
data.sector = sectorSpan.textContent.replace('Sector', '').replace(':', '').trim();
|
||
}
|
||
}
|
||
|
||
// Look for exchange near ticker
|
||
const exchangeElement = document.querySelector('[data-testid="exchange"], .exchange');
|
||
if (exchangeElement) {
|
||
data.exchange = exchangeElement.textContent.trim();
|
||
}
|
||
|
||
// Fallback: parse from page text
|
||
const pageText = document.body.textContent || '';
|
||
if (!data.sector) {
|
||
const sectorMatch = pageText.match(/Sector[:\s]+([A-Za-z\s&]+)/);
|
||
if (sectorMatch) data.sector = sectorMatch[1].trim();
|
||
}
|
||
if (!data.exchange) {
|
||
const exchangeMatch = pageText.match(/(NYSE|NASDAQ|AMEX|OTC)/i);
|
||
if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();
|
||
}
|
||
|
||
return data;
|
||
}
|
||
''')
|
||
|
||
quote.sector = header_data.get('sector')
|
||
quote.exchange = header_data.get('exchange')
|
||
|
||
if debug:
|
||
logger.debug(f"Extracted quote data: price={quote.price}, volume={quote.volume}, "
|
||
f"52w_range={quote.week_52_low}-{quote.week_52_high}")
|
||
|
||
except Exception as e:
|
||
if debug:
|
||
logger.debug(f"Error extracting quote data: {e}")
|
||
|
||
return quote
|
||
|
||
|
||
async def extract_enhanced_dividends(page, debug: bool = False) -> EnhancedDividends:
|
||
"""Extract enhanced dividend data including next payment dates.
|
||
|
||
Args:
|
||
page: Playwright page object
|
||
debug: Enable debug logging
|
||
|
||
Returns:
|
||
EnhancedDividends object with extracted fields
|
||
"""
|
||
dividends = EnhancedDividends()
|
||
|
||
try:
|
||
if debug:
|
||
logger.debug("Starting enhanced dividend extraction...")
|
||
|
||
# Wait for dividends panel to load
|
||
await page.wait_for_selector('#dividends', timeout=15000)
|
||
|
||
# Scroll to dividends panel
|
||
await page.evaluate('''
|
||
() => {
|
||
const dividendsPanel = document.querySelector('#dividends');
|
||
if (dividendsPanel) {
|
||
dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
}
|
||
}
|
||
''')
|
||
await page.wait_for_timeout(1000)
|
||
|
||
# CRITICAL: Click on the panel header to trigger content loading
|
||
# Schwab's panels don't auto-load - they need to be clicked
|
||
if debug:
|
||
logger.debug("Clicking dividends panel header to trigger content load...")
|
||
try:
|
||
dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title, #dividends-togglechevron-button')
|
||
if dividends_header:
|
||
await dividends_header.click()
|
||
await page.wait_for_timeout(2000)
|
||
if debug:
|
||
logger.debug("Clicked dividends panel header successfully")
|
||
except Exception as e:
|
||
if debug:
|
||
logger.debug(f"Could not click dividends header: {e}")
|
||
|
||
# Wait for content to load after click
|
||
await page.wait_for_timeout(1000)
|
||
|
||
# Extract dividend data
|
||
dividend_data = await page.evaluate('''
|
||
() => {
|
||
const data = {};
|
||
const dividendsPanel = document.querySelector('#dividends');
|
||
if (!dividendsPanel) return data;
|
||
|
||
const fullText = dividendsPanel.textContent || '';
|
||
|
||
// DEBUG: Return sample of text for debugging
|
||
data._debug_text_sample = fullText.substring(0, 800);
|
||
|
||
// Next dividend payment
|
||
const nextPaymentMatch = fullText.match(/Next Dividend Payment\\s*\\$([0-9.]+)/i);
|
||
if (nextPaymentMatch) data.next_payment = nextPaymentMatch[1];
|
||
|
||
// Next pay date
|
||
const nextPayDateMatch = fullText.match(/Next Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
|
||
if (nextPayDateMatch) data.next_pay_date = nextPayDateMatch[1];
|
||
|
||
// Next ex-date
|
||
const nextExDateMatch = fullText.match(/Next Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
|
||
if (nextExDateMatch) data.next_ex_date = nextExDateMatch[1];
|
||
|
||
// Previous dividend payment
|
||
const prevPaymentMatch = fullText.match(/Previous Dividend Payment\\s*\\$([0-9.]+)/i);
|
||
if (prevPaymentMatch) data.previous_payment = prevPaymentMatch[1];
|
||
|
||
// Previous pay date
|
||
const prevPayDateMatch = fullText.match(/Previous Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
|
||
if (prevPayDateMatch) data.previous_pay_date = prevPayDateMatch[1];
|
||
|
||
// Previous ex-date
|
||
const prevExDateMatch = fullText.match(/Previous Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
|
||
if (prevExDateMatch) data.previous_ex_date = prevExDateMatch[1];
|
||
|
||
// Frequency
|
||
const frequencyMatch = fullText.match(/Frequency\\s*(Quarterly|Monthly|Annual|Semi-Annual)/i);
|
||
if (frequencyMatch) data.frequency = frequencyMatch[1];
|
||
|
||
// Annual Dividend Rate (IAD)
|
||
const annualRateMatch = fullText.match(/Annual Dividend Rate.*?\\$([0-9.]+)/i);
|
||
if (annualRateMatch) data.annual_rate = annualRateMatch[1];
|
||
|
||
// Annual Dividend Yield - appears after "Annual Dividend Yield" text
|
||
// Text pattern: "Annual Dividend Yield...2.71%"
|
||
const yieldMatch = fullText.match(/Annual Dividend Yield[\\s\\S]{0,300}?([0-9]+\\.[0-9]+)%/i);
|
||
if (yieldMatch) data.annual_yield = yieldMatch[1];
|
||
|
||
return data;
|
||
}
|
||
''')
|
||
|
||
if debug and dividend_data.get('_debug_text_sample'):
|
||
logger.debug(f"Dividend panel text sample: {dividend_data['_debug_text_sample']}")
|
||
|
||
# Parse and assign values
|
||
dividends.next_payment = _parse_float(dividend_data.get('next_payment'))
|
||
dividends.next_pay_date = dividend_data.get('next_pay_date')
|
||
dividends.next_ex_date = dividend_data.get('next_ex_date')
|
||
dividends.previous_payment = _parse_float(dividend_data.get('previous_payment'))
|
||
dividends.previous_pay_date = dividend_data.get('previous_pay_date')
|
||
dividends.previous_ex_date = dividend_data.get('previous_ex_date')
|
||
dividends.frequency = dividend_data.get('frequency')
|
||
dividends.annual_rate = _parse_float(dividend_data.get('annual_rate'))
|
||
dividends.annual_yield = _parse_float(dividend_data.get('annual_yield'))
|
||
|
||
if debug:
|
||
logger.debug(f"Extracted dividend data: next_payment={dividends.next_payment}, "
|
||
f"next_pay_date={dividends.next_pay_date}, annual_rate={dividends.annual_rate}")
|
||
|
||
except Exception as e:
|
||
if debug:
|
||
logger.debug(f"Error extracting dividend data: {e}")
|
||
|
||
return dividends
|
||
|
||
|
||
async def extract_earnings_data(page, debug: bool = False) -> EarningsData:
|
||
"""Extract earnings metrics and forecasts.
|
||
|
||
Args:
|
||
page: Playwright page object
|
||
debug: Enable debug logging
|
||
|
||
Returns:
|
||
EarningsData object with extracted fields
|
||
"""
|
||
earnings = EarningsData()
|
||
|
||
try:
|
||
if debug:
|
||
logger.debug("Starting earnings data extraction...")
|
||
|
||
# Wait for earnings panel to load
|
||
await page.wait_for_selector('#expected-earnings', timeout=15000)
|
||
|
||
# Scroll to earnings panel
|
||
await page.evaluate('''
|
||
() => {
|
||
const earningsPanel = document.querySelector('#expected-earnings');
|
||
if (earningsPanel) {
|
||
earningsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
||
}
|
||
}
|
||
''')
|
||
await page.wait_for_timeout(1000)
|
||
|
||
# CRITICAL: Click on the panel header to trigger content loading
|
||
# Schwab's panels don't auto-load - they need to be clicked
|
||
if debug:
|
||
logger.debug("Clicking earnings panel header to trigger content load...")
|
||
try:
|
||
earnings_header = await page.query_selector('#expected-earnings h2, #expected-earnings .sdps-panel__title, #expected-earnings-heading, #expected-earnings-togglechevron-button')
|
||
if earnings_header:
|
||
await earnings_header.click()
|
||
await page.wait_for_timeout(2000)
|
||
if debug:
|
||
logger.debug("Clicked earnings panel header successfully")
|
||
except Exception as e:
|
||
if debug:
|
||
logger.debug(f"Could not click earnings header: {e}")
|
||
|
||
# Wait for content to load after click
|
||
await page.wait_for_timeout(1000)
|
||
|
||
# Check for and click "Show More" if present
|
||
try:
|
||
# Use JS to find and click - most robust way
|
||
clicked = await page.evaluate('''
|
||
() => {
|
||
const panel = document.querySelector('#expected-earnings');
|
||
if (!panel) return false;
|
||
|
||
// Find any element with "Show More" text
|
||
const elements = Array.from(panel.querySelectorAll('a, button, span, div'));
|
||
const showMore = elements.find(el => el.textContent.trim().toLowerCase() === "show more");
|
||
|
||
if (showMore) {
|
||
showMore.click();
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
''')
|
||
|
||
if clicked:
|
||
if debug:
|
||
logger.debug("found and clicked 'Show More' via JS")
|
||
await page.wait_for_timeout(2000)
|
||
elif debug:
|
||
logger.debug("'Show More' not found or not clickable")
|
||
|
||
except Exception as e:
|
||
if debug:
|
||
logger.debug(f"Error checking for Show More: {e}")
|
||
|
||
# Extract earnings data
|
||
earnings_data = await page.evaluate(r'''
|
||
(debug) => {
|
||
const data = {};
|
||
// Helper to get text content including Shadow DOMs
|
||
const getDeepText = (root) => {
|
||
if (!root) return '';
|
||
if (root.nodeType === Node.TEXT_NODE) return root.textContent;
|
||
if (root.nodeType === Node.ELEMENT_NODE && root.shadowRoot) {
|
||
return getDeepText(root.shadowRoot);
|
||
}
|
||
|
||
let text = '';
|
||
const children = root.childNodes;
|
||
for (let i = 0; i < children.length; i++) {
|
||
text += getDeepText(children[i]);
|
||
}
|
||
return text;
|
||
};
|
||
|
||
const earningsPanel = document.querySelector('#expected-earnings');
|
||
let fullText = '';
|
||
|
||
if (earningsPanel) {
|
||
fullText = getDeepText(earningsPanel);
|
||
}
|
||
|
||
// Fallback to body deep text if panel seems empty
|
||
if (fullText.length < 500 || !fullText.includes("Announcement")) {
|
||
fullText = getDeepText(document.body);
|
||
}
|
||
|
||
// Next earnings announcement - robust regex checking for various patterns
|
||
let nextAnnouncementMatch = fullText.match(/Next Earnings Announcement.*?([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
|
||
if (!nextAnnouncementMatch) {
|
||
// Try alternate pattern: Announcement: 12/12/2025
|
||
nextAnnouncementMatch = fullText.match(/Announcement:?\s*([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
|
||
}
|
||
if (nextAnnouncementMatch) data.next_announcement_date = nextAnnouncementMatch[1];
|
||
|
||
// Announcement timing
|
||
const timingMatch = fullText.match(/(Before Market Open|After Market Close)/i);
|
||
if (timingMatch) data.announcement_timing = timingMatch[1];
|
||
|
||
// Number of analysts
|
||
const analystsMatch = fullText.match(/With ([0-9]+) analysts covering/i);
|
||
if (analystsMatch) data.analysts_covering = analystsMatch[1];
|
||
|
||
// Consensus estimate
|
||
const consensusMatch = fullText.match(/consensus.*?estimate is \\$([0-9.]+)/i);
|
||
if (consensusMatch) data.consensus_estimate = consensusMatch[1];
|
||
|
||
// High/Low estimates
|
||
const highLowMatch = fullText.match(/high and low estimates are \\$([0-9.]+) and \\$([0-9.]+)/i);
|
||
if (highLowMatch) {
|
||
data.estimate_high = highLowMatch[1];
|
||
data.estimate_low = highLowMatch[2];
|
||
}
|
||
|
||
// EPS TTM (multiple patterns)
|
||
let epsMatch = fullText.match(/EPS\s*\(TTM\)\s*(?:Value)?\s*\$?([0-9.-]+)/i);
|
||
if (!epsMatch) epsMatch = fullText.match(/Earnings per Share\s*\(?TTM\)?\s*(?:Value)?\s*\$?([0-9.-]+)/i);
|
||
if (!epsMatch) epsMatch = fullText.match(/EPS\s+(?:Value)?\s*([0-9.-]+)/i);
|
||
if (epsMatch) data.eps_ttm = epsMatch[1];
|
||
|
||
// Revenue TTM
|
||
let revenueMatch = fullText.match(/Revenue\s*\(TTM\)\s*(?:Value)?\s*\$([0-9.]+[KMBT]?)/i);
|
||
if (!revenueMatch) revenueMatch = fullText.match(/Revenue\s+(?:Value)?\s*\$([0-9.]+[KMBT])/i);
|
||
if (revenueMatch) data.revenue_ttm = revenueMatch[1];
|
||
|
||
// P/E TTM (multiple patterns)
|
||
let peMatch = fullText.match(/Price[\/\s]*Earnings\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
|
||
if (!peMatch) peMatch = fullText.match(/P[\/\s]*E\s*\(?TTM\)?\s*(?:Value)?\s*([0-9.]+)/i);
|
||
if (!peMatch) peMatch = fullText.match(/PE Ratio\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
|
||
if (peMatch) data.pe_ttm = peMatch[1];
|
||
|
||
// Forward P/E
|
||
let forwardPeMatch = fullText.match(/Forward\s+P[\/\s]*E\s*(?:Value)?\s*([0-9.]+)/i);
|
||
if (!forwardPeMatch) forwardPeMatch = fullText.match(/P[\/\s]*E\s*\(Forward\)\s*(?:Value)?\s*([0-9.]+)/i);
|
||
if (forwardPeMatch) data.forward_pe = forwardPeMatch[1];
|
||
|
||
// PEG Ratio
|
||
let pegMatch = fullText.match(/Price\s+to\s+Earnings[\/\s]*Growth\s*\(PEG\)\s*(?:Value)?\s*([0-9.]+)/i);
|
||
if (!pegMatch) pegMatch = fullText.match(/PEG\s*Ratio?\s*(?:Value)?\s*([0-9.]+)/i);
|
||
if (pegMatch) data.peg_ratio = pegMatch[1];
|
||
|
||
// Recent beats/misses (simplified - just extract beat amounts)
|
||
const beatMatches = fullText.matchAll(/Beat.*?\$([0-9.]+)/gi);
|
||
data.recent_beats = [];
|
||
for (const match of beatMatches) {
|
||
data.recent_beats.push(match[1]);
|
||
}
|
||
|
||
return data;
|
||
}
|
||
''', debug)
|
||
|
||
# Parse and assign values
|
||
earnings.next_announcement_date = earnings_data.get('next_announcement_date')
|
||
earnings.announcement_timing = earnings_data.get('announcement_timing')
|
||
earnings.analysts_covering = _parse_int(earnings_data.get('analysts_covering'))
|
||
earnings.consensus_estimate = _parse_float(earnings_data.get('consensus_estimate'))
|
||
earnings.estimate_high = _parse_float(earnings_data.get('estimate_high'))
|
||
earnings.estimate_low = _parse_float(earnings_data.get('estimate_low'))
|
||
earnings.eps_ttm = _parse_float(earnings_data.get('eps_ttm'))
|
||
earnings.revenue_ttm = _parse_revenue(earnings_data.get('revenue_ttm', ''))
|
||
earnings.pe_ttm = _parse_float(earnings_data.get('pe_ttm'))
|
||
earnings.forward_pe = _parse_float(earnings_data.get('forward_pe'))
|
||
earnings.peg_ratio = _parse_float(earnings_data.get('peg_ratio'))
|
||
|
||
# Store recent beats as list of dicts
|
||
if earnings_data.get('recent_beats'):
|
||
earnings.recent_beats = [
|
||
{'beat_amount': _parse_float(beat)}
|
||
for beat in earnings_data.get('recent_beats', [])
|
||
]
|
||
|
||
if debug:
|
||
logger.debug(f"Extracted earnings data: eps_ttm={earnings.eps_ttm}, "
|
||
f"pe_ttm={earnings.pe_ttm}, forward_pe={earnings.forward_pe}")
|
||
|
||
except Exception as e:
|
||
if debug:
|
||
logger.debug(f"Error extracting earnings data: {e}")
|
||
|
||
return earnings
|
||
|
||
|
||
def calculate_payout_ratio(annual_dividend: Optional[float], eps_ttm: Optional[float]) -> Optional[float]:
|
||
"""Calculate dividend payout ratio.
|
||
|
||
Formula: (Annual Dividend Rate / EPS TTM) × 100
|
||
|
||
Args:
|
||
annual_dividend: Annual dividend rate per share
|
||
eps_ttm: Earnings per share (trailing twelve months)
|
||
|
||
Returns:
|
||
Payout ratio as percentage, or None if cannot calculate
|
||
"""
|
||
if annual_dividend and eps_ttm and eps_ttm > 0:
|
||
ratio = (annual_dividend / eps_ttm) * 100
|
||
return round(ratio, 2)
|
||
return None
|
||
|
||
|
||
async def extract_phase1_data(page, debug: bool = False) -> EquityPhase1Data:
|
||
"""Extract all Phase 1 data points.
|
||
|
||
Args:
|
||
page: Playwright page object
|
||
debug: Enable debug output
|
||
|
||
Returns:
|
||
EquityPhase1Data object with all extracted data
|
||
"""
|
||
if debug:
|
||
logger.debug("Starting Phase 1 data extraction...")
|
||
|
||
# Wait for page to stabilize
|
||
await page.wait_for_timeout(3000)
|
||
|
||
# Extract ticker from page URL
|
||
ticker = await page.evaluate('''
|
||
() => {
|
||
const url = window.location.href;
|
||
const match = url.match(/stocks\\/([A-Z]+)/i);
|
||
return match ? match[1].toUpperCase() : '';
|
||
}
|
||
''')
|
||
|
||
# Extract each section
|
||
quote = await extract_quote_data(page, ticker=ticker, debug=debug)
|
||
dividends = await extract_enhanced_dividends(page, debug=debug)
|
||
earnings = await extract_earnings_data(page, debug=debug)
|
||
|
||
# Calculate derived metrics
|
||
calculated = CalculatedMetrics()
|
||
if dividends.annual_rate and earnings.eps_ttm:
|
||
calculated.payout_ratio = calculate_payout_ratio(
|
||
dividends.annual_rate,
|
||
earnings.eps_ttm
|
||
)
|
||
|
||
# Create Phase 1 data object
|
||
phase1_data = EquityPhase1Data(
|
||
ticker=ticker,
|
||
quote=quote,
|
||
dividends=dividends,
|
||
earnings=earnings,
|
||
calculated_metrics=calculated
|
||
)
|
||
|
||
if debug:
|
||
logger.debug(f"Phase 1 extraction complete for {ticker}")
|
||
|
||
return phase1_data
|