from typing import Dict, Any, Optional from ...utils.logging import save_debug_artifact def should_replace_dividend_value(existing_value: Optional[str], new_value: Optional[str]) -> bool: """ Decide whether to replace an existing dividend field value with a new one. Rules: - Never replace with empty/None values - Replace if there is no existing value - Replace if the existing value is "Show More" or contains "Show More" - Otherwise, keep the existing (good) data """ if not new_value or not str(new_value).strip(): return False if not existing_value: return True existing_text = str(existing_value) if existing_text == 'Show More' or 'Show More' in existing_text: return True return False async def extract_dividend_data(page, debug: bool = False) -> Dict[str, Any]: """ Extract dividend information from Schwab stock page. Returns dictionary with dividend data fields. """ dividend_data: Dict[str, Any] = {} try: if debug: print("DEBUG: Starting dividend data extraction...") # Take initial screenshot to see page state png = await page.screenshot(full_page=True) path = save_debug_artifact("debug_dividend_start.png", png) print(f"DEBUG: Initial screenshot saved as {path}") # Wait for the dividends section to load dynamically if debug: print("DEBUG: Waiting for dividends section to load...") try: # First wait for the dividends panel to appear await page.wait_for_selector('#dividends', timeout=15000) if debug: print("DEBUG: #dividends panel found") # Wait for dividend content to load dynamically dividend_loaded = False max_attempts = 5 # Reduced from 10 for faster tests attempt = 0 while not dividend_loaded and attempt < max_attempts: attempt += 1 if debug: print(f"DEBUG: Attempt {attempt}/{max_attempts} - Waiting for dynamic dividend content...") # Check if the dividends section has been populated with actual content dividend_status = await page.evaluate(''' () => { const result = { loaded: false, debug: {} }; // Look for the dividends panel content that should be populated const dividendsPanel = document.querySelector('#dividends'); if (dividendsPanel) { const panelBody = dividendsPanel.querySelector('.sdps-panel__body'); if (panelBody) { const textContent = panelBody.textContent || ''; result.debug.panelBodyLength = textContent.length; result.debug.panelBodySample = textContent.substring(0, 200); // Check if the panel has been populated with actual dividend text // (not just empty comments) const hasRealContent = textContent.length > 50 && ( textContent.includes('Previous Dividend') || textContent.includes('Pay Date') || textContent.includes('Ex-Date') || textContent.includes('Frequency') || textContent.includes('Annual Dividend') || textContent.includes('$') || textContent.includes('%') ); if (hasRealContent) { result.loaded = true; return result; } } } // Alternative: check for stock-dividends component const stockDividends = document.querySelector('stock-dividends'); if (stockDividends) { const text = stockDividends.textContent || ''; result.debug.stockDividendsLength = text.length; result.debug.stockDividendsSample = text.substring(0, 100); if (text.length > 20 && text.includes('$')) { result.loaded = true; return result; } } // Alternative: check for any elements with dividend-related content const allElements = document.querySelectorAll('#dividends *'); result.debug.totalElements = allElements.length; for (let elem of allElements) { const text = elem.textContent || ''; if (text.includes('Previous Dividend Payment') || (text.includes('$') && text.includes('.'))) { result.loaded = true; result.debug.foundInElement = elem.tagName + '.' + elem.className; return result; } } return result; } ''') if debug: print(f"DEBUG: Dividend status: {dividend_status}") dividend_loaded = dividend_status.get('loaded', False) if dividend_loaded: if debug: print("DEBUG: Dynamic dividend content loaded!") png = await page.screenshot(full_page=True) path = save_debug_artifact("debug_dividend_content_loaded.png", png) print(f"DEBUG: Screenshot after content loaded: {path}") break # Wait between attempts to allow for async loading await page.wait_for_timeout(1000) # Reduced from 2000ms for faster tests if not dividend_loaded: if debug: print("DEBUG: Basic dividend content did not auto-load - this suggests the page is not behaving as expected") print("DEBUG: Expected behavior: Basic dividend info should be visible without clicking 'Show More'") # Try to force a page refresh or trigger loading print("DEBUG: Attempting to trigger dividend content loading...") try: # Try scrolling to the dividend section to trigger lazy loading await page.evaluate(''' () => { const dividendsPanel = document.querySelector('#dividends'); if (dividendsPanel) { dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' }); } } ''') await page.wait_for_timeout(3000) # Try clicking on the dividends panel header to ensure it's active try: dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title') if dividends_header: await dividends_header.click() await page.wait_for_timeout(2000) print("DEBUG: Clicked on dividends panel header") except: pass # Check one more time if content loaded final_status = await page.evaluate(''' () => { const dividendsPanel = document.querySelector('#dividends'); if (dividendsPanel) { const panelBody = dividendsPanel.querySelector('.sdps-panel__body'); if (panelBody) { const textContent = panelBody.textContent || ''; return { length: textContent.length, sample: textContent.substring(0, 500), hasBasicData: textContent.includes('$') && ( textContent.includes('Previous') || textContent.includes('Pay Date') || textContent.includes('Ex-Date') ) }; } } return { length: 0, sample: '', hasBasicData: false }; } ''') if debug: print(f"DEBUG: Final dividend panel status: {final_status}") if final_status.get('hasBasicData'): print("DEBUG: Basic dividend data now detected after manual triggering!") dividend_loaded = True # Extract the data immediately while it's loaded immediate_extraction = await page.evaluate(r''' () => { const results = {}; const dividendsPanel = document.querySelector('#dividends'); if (dividendsPanel) { const panelBody = dividendsPanel.querySelector('.sdps-panel__body'); if (panelBody) { const fullText = panelBody.textContent || ''; // Extract data using pattern matching from the full text const patterns = { 'Previous Dividend Payment': /Previous Dividend Payment\s*\$([0-9]+\.[0-9]+)/, 'Previous Pay Date': /Previous Pay Date\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/, 'Previous Ex-Date': /Previous Ex-Date\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/, 'Frequency': /Frequency\s*([A-Za-z]+)/, 'Annual Dividend Rate': /(?:Annual Dividend Rate|IAD).*?\$([0-9]+\.[0-9]+)/, 'Annual Dividend Yield': /([0-9]+\.[0-9]+%)(?=\s|Annual|$)/ }; for (const [field, pattern] of Object.entries(patterns)) { const match = fullText.match(pattern); if (match) { if (field === 'Previous Dividend Payment' || field === 'Annual Dividend Rate') { results[field] = '$' + match[1]; } else { results[field] = match[1]; } } } } } return results; } ''') if debug: print(f"DEBUG: Immediate extraction results: {immediate_extraction}") if immediate_extraction: dividend_data.update(immediate_extraction) # Clean up the Frequency field if it has extra text if 'Frequency' in dividend_data and 'Quarterly' in dividend_data['Frequency']: dividend_data['Frequency'] = 'Quarterly' except Exception as e: if debug: print(f"DEBUG: Error during manual triggering: {e}") png = await page.screenshot(full_page=True) path = save_debug_artifact("debug_dividend_timeout.png", png) print(f"DEBUG: Screenshot after timeout: {path}") except Exception as e: if debug: print(f"DEBUG: Error waiting for dividend content: {e}") # Check for dividend grid directly without clicking if debug: print("DEBUG: Checking for #dividend-grid...") dividend_grid_found = False try: await page.wait_for_selector('#dividend-grid', timeout=10000) dividend_grid_found = True if debug: print("DEBUG: #dividend-grid found!") png = await page.screenshot(full_page=True) path = save_debug_artifact("debug_dividend_grid_found.png", png) print(f"DEBUG: Screenshot with dividend grid: {path}") except: if debug: print("DEBUG: #dividend-grid not found initially") png = await page.screenshot(full_page=True) path = save_debug_artifact("debug_dividend_no_grid.png", png) print(f"DEBUG: Screenshot without grid: {path}") # Try to scroll to the dividend section to ensure it's in view if debug: print("DEBUG: Scrolling to stock-dividends component...") try: await page.evaluate(''' () => { const stockDividends = document.querySelector('stock-dividends'); if (stockDividends) { stockDividends.scrollIntoView({ behavior: 'smooth', block: 'center' }); } } ''') await page.wait_for_timeout(3000) if debug: png = await page.screenshot(full_page=True) path = save_debug_artifact("debug_dividend_after_scroll.png", png) print(f"DEBUG: Screenshot after scroll: {path}") # Check again for dividend grid after scrolling try: await page.wait_for_selector('#dividend-grid', timeout=5000) dividend_grid_found = True if debug: print("DEBUG: #dividend-grid found after scroll!") png = await page.screenshot(full_page=True) path = save_debug_artifact("debug_dividend_grid_after_scroll.png", png) print(f"DEBUG: Screenshot with grid after scroll: {path}") except: if debug: print("DEBUG: #dividend-grid still not found after scroll") except Exception as e: if debug: print(f"DEBUG: Error during scroll attempt: {e}") # Common dividend section selectors used by financial websites dividend_selectors = [ '#dividend-grid', # Primary target based on user feedback 'stock-dividends', # Secondary target - the web component '#dividend-section', '#dividends-section', '.dividend-summary', '.dividends-summary', 'div[data-testid*="dividend"]', 'div[aria-label*="dividend"]', '[class*="dividend"]', 'section:has-text("Dividend")', 'div:has-text("Previous Dividend Payment")' ] # Try to find dividend section dividend_section = None for selector in dividend_selectors: try: if await page.is_visible(selector): dividend_section = selector if debug: print(f"DEBUG: Found dividend section with selector: {selector}") break except: continue if not dividend_section: if debug: print("DEBUG: No dividend section found, trying broader search...") # In debug mode, capture the page content to help identify selectors page_content = await page.content() path_html = save_debug_artifact("debug_dividend_page.html", page_content) print(f"DEBUG: Page HTML saved to {path_html} for analysis") # Also save a screenshot to see the visual layout png = await page.screenshot(full_page=True) path_png = save_debug_artifact("debug_dividend_page.png", png) print(f"DEBUG: Page screenshot saved to {path_png}") # Fallback: look for dividend-related text anywhere on page dividend_text_exists = await page.evaluate(''' () => { const text = document.body.innerText.toLowerCase(); return text.includes('dividend') || text.includes('ex-date') || text.includes('pay date') || text.includes('previous dividend') || text.includes('iad'); } ''') if debug: print(f"DEBUG: Dividend-related text found on page: {dividend_text_exists}") # Try scrolling down to reveal more content await page.evaluate('window.scrollTo(0, document.body.scrollHeight)') await page.wait_for_timeout(2000) # Extract all text content that might contain dividend info dividend_related_text = await page.evaluate(''' () => { const text = document.body.innerText; const lines = text.split('\n'); const dividendLines = lines.filter(line => { const lower = line.toLowerCase(); return lower.includes('dividend') || lower.includes('ex-date') || lower.includes('pay date') || lower.includes('previous') || lower.includes('iad') || lower.includes('frequency') || lower.includes('quarterly') || lower.includes('$0.26') || lower.includes('0.4865%') || lower.includes('$1.04') || lower.includes('annual dividend') || lower.includes('yield'); }); return dividendLines; } ''') print(f"DEBUG: Found dividend-related text lines: {dividend_related_text}") # Try a more comprehensive search for dividend data all_dividend_info = await page.evaluate(''' () => { // Look for elements containing common dividend field names const fieldNames = [ 'Previous Dividend Payment', 'Next Dividend Payment', 'Previous Pay Date', 'Next Pay Date', 'Previous Ex-Date', 'Next Ex-Date', 'Ex-Date', 'Frequency', 'Annual Dividend Rate', 'IAD', 'Annual Dividend Yield', 'Dividend Yield' ]; const results = {}; fieldNames.forEach(fieldName => { // Search for elements containing this field name const elements = Array.from(document.querySelectorAll('*')).filter(el => el.textContent && el.textContent.includes(fieldName) && el.children.length === 0 // Text nodes only ); elements.forEach(el => { // Look for value in nearby elements const parent = el.parentElement; if (parent) { const siblings = Array.from(parent.children); const currentIndex = siblings.indexOf(el); // Check next siblings for values for (let i = currentIndex + 1; i < siblings.length; i++) { const sibling = siblings[i]; const text = sibling.textContent.trim(); if (text && text !== fieldName && text.length > 0 && text.length < 50) { results[fieldName] = text; break; } } // Check same element for values after the field name const fullText = el.textContent; const fieldIndex = fullText.indexOf(fieldName); if (fieldIndex >= 0) { const afterField = fullText.substring(fieldIndex + fieldName.length).trim(); if (afterField && afterField.length > 0 && afterField.length < 50) { results[fieldName] = afterField; } } } }); }); return results; } ''') print(f"DEBUG: Comprehensive dividend search results: {all_dividend_info}") # If we found data in the comprehensive search, use it only if we don't already have good data if all_dividend_info: for field, value in all_dividend_info.items(): if value and value.strip(): existing_value = dividend_data.get(field, '') if should_replace_dividend_value(existing_value, value): dividend_data[field] = value.strip() if debug: print(f"DEBUG: Added dividend field from comprehensive search: {field} = {value}") elif debug: print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring comprehensive search value: {value})") if not dividend_text_exists: if debug: print("DEBUG: No dividend-related content found on page") return dividend_data # Use body as fallback section for broad search dividend_section = 'body' if debug: print("DEBUG: Using body as dividend section for broad search") # If we found the dividend grid, use specific selectors based on user feedback if dividend_section == '#dividend-grid': if debug: print("DEBUG: Using specific dividend grid selectors...") try: # First check if dividend grid is actually present and populated grid_status = await page.evaluate(''' () => { const dividendGrid = document.querySelector('#dividend-grid'); if (!dividendGrid) return { found: false, message: 'No #dividend-grid element found' }; const textContent = dividendGrid.textContent || ''; const hasContent = textContent.trim().length > 50; const childCount = dividendGrid.children.length; return { found: true, hasContent, textLength: textContent.length, childCount, preview: textContent.substring(0, 200), message: `Grid found with ${childCount} children, ${textContent.length} chars` }; } ''') if debug: print(f"DEBUG: Dividend grid status: {grid_status}") # Extract dividend data using improved selectors specific_dividend_data = await page.evaluate(r''' () => { const results = {}; // Check if dividend grid exists and has content const dividendGrid = document.querySelector('#dividend-grid'); if (dividendGrid) { const allGridText = dividendGrid.textContent || ''; const lines = allGridText.split('\n').map(line => line.trim()).filter(line => line.length > 0); // Try structured approach first - look for rows/cells const dividendRows = dividendGrid.querySelectorAll('div[class*="row"], tr, .dividend-row, div:has(div)'); dividendRows.forEach((row, rowIndex) => { const rowText = row.textContent || ''; // Look for dividend payment info if (rowText.includes('Dividend Payment') || (rowText.includes('Previous') && rowText.includes('$'))) { const amountMatch = rowText.match(/\$[0-9]+\.[0-9]+/); if (amountMatch && !results['Previous Dividend Payment']) { results['Previous Dividend Payment'] = amountMatch[0]; } // Look for dates in the same row const dateMatches = rowText.match(/([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/g); if (dateMatches) { if (dateMatches.length >= 1 && !results['Previous Pay Date']) results['Previous Pay Date'] = dateMatches[0]; if (dateMatches.length >= 2 && !results['Previous Ex-Date']) results['Previous Ex-Date'] = dateMatches[1]; } } }); // Fallback: Parse all lines systematically for (let i = 0; i < lines.length; i++) { const line = lines[i]; const nextLine = i + 1 < lines.length ? lines[i + 1] : ''; // Match dividend payment if ((line.includes('Previous Dividend Payment') || line.includes('Dividend Payment')) && !results['Previous Dividend Payment']) { const amountPattern = /\$[0-9]+\.[0-9]+/; let amount = line.match(amountPattern) || nextLine.match(amountPattern); if (amount) results['Previous Dividend Payment'] = amount[0]; } // Match pay date if (line.includes('Pay Date') && !results['Previous Pay Date']) { const datePattern = /[A-Za-z]{3,9} [0-9]{1,2}, [0-9]{4}/; let date = line.match(datePattern) || nextLine.match(datePattern); if (date) results['Previous Pay Date'] = date[0]; } // Match ex-date if (line.includes('Ex-Date') && !results['Previous Ex-Date']) { const datePattern = /[A-Za-z]{3,9} [0-9]{1,2}, [0-9]{4}/; let date = line.match(datePattern) || nextLine.match(datePattern); if (date) results['Previous Ex-Date'] = date[0]; } // Match frequency if (line.includes('Frequency') && !results['Frequency']) { const freqLine = line + ' ' + nextLine; if (freqLine.toLowerCase().includes('quarterly')) results['Frequency'] = 'Quarterly'; else if (freqLine.toLowerCase().includes('monthly')) results['Frequency'] = 'Monthly'; else if (freqLine.toLowerCase().includes('annual')) results['Frequency'] = 'Annual'; else if (freqLine.toLowerCase().includes('semi')) results['Frequency'] = 'Semi-Annual'; } // Match annual dividend rate if ((line.includes('Annual Dividend Rate') || line.includes('IAD')) && !results['Annual Dividend Rate']) { const amountPattern = /\$[0-9]+\.[0-9]+/; let amount = line.match(amountPattern) || nextLine.match(amountPattern); if (amount) results['Annual Dividend Rate'] = amount[0]; } // Match annual dividend yield if (line.includes('Annual Dividend Yield') && !results['Annual Dividend Yield']) { const percentPattern = /[0-9]+\.[0-9]+%/; let percent = line.match(percentPattern) || nextLine.match(percentPattern); if (percent) results['Annual Dividend Yield'] = percent[0]; } } } return results; } ''') if debug: print(f"DEBUG: Specific dividend grid extraction results: {specific_dividend_data}") # Add the extracted data to dividend_data only if we don't already have good data if specific_dividend_data: for field, value in specific_dividend_data.items(): existing_value = dividend_data.get(field, '') if should_replace_dividend_value(existing_value, value): dividend_data[field] = value if debug: print(f"DEBUG: Updated {field} from specific extraction: {value}") elif debug: print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring specific extraction value: {value})") except Exception as e: if debug: print(f"DEBUG: Error in specific dividend grid extraction: {e}") # Extract dividend data using the correct structure from gemini analysis if debug: print("DEBUG: Extracting dividend data from dividend-grid structure...") # First try to extract data from the dynamically loaded dividend content try: dividend_dynamic_data = await page.evaluate(r''' () => { const results = {}; // Strategy 1: Look for any dividend grid structure that was loaded const dividendGrid = document.querySelector('#dividend-grid'); if (dividendGrid) { const rows = dividendGrid.querySelectorAll('div.sdps-row, .row'); for (let row of rows) { const cells = row.querySelectorAll('div[class*="col-"]'); if (cells.length >= 2) { const label = cells[0].textContent.trim(); const value = cells[1].textContent.trim(); // Map the labels to our expected field names if (label.includes('Previous Dividend Payment') || label.includes('Dividend Payment')) { results['Previous Dividend Payment'] = value; } else if (label.includes('Previous Pay Date') || label.includes('Pay Date')) { results['Previous Pay Date'] = value; } else if (label.includes('Previous Ex-Date') || label.includes('Ex-Date')) { results['Previous Ex-Date'] = value; } else if (label.includes('Frequency')) { results['Frequency'] = value; } else if (label.includes('Annual Dividend Rate') || label.includes('IAD')) { results['Annual Dividend Rate'] = value; } else if (label.includes('Annual Dividend Yield')) { results['Annual Dividend Yield'] = value; } } } if (Object.keys(results).length > 0) { return results; } } // Strategy 2: Look for stock-dividends component content const stockDividends = document.querySelector('stock-dividends'); if (stockDividends) { const allText = stockDividends.textContent || ''; const lines = allText.split('\n').map(line => line.trim()).filter(line => line); for (let i = 0; i < lines.length; i++) { const line = lines[i]; const nextLine = i + 1 < lines.length ? lines[i + 1] : ''; if (line.includes('Previous Dividend Payment') || line.includes('Dividend Payment')) { const amountMatch = (line + ' ' + nextLine).match(/\$[0-9]+\.[0-9]+/); if (amountMatch) results['Previous Dividend Payment'] = amountMatch[0]; } else if (line.includes('Pay Date')) { const dateMatch = (line + ' ' + nextLine).match(/[A-Za-z]+ [0-9]{1,2}, [0-9]{4}/); if (dateMatch) results['Previous Pay Date'] = dateMatch[0]; } else if (line.includes('Ex-Date')) { const dateMatch = (line + ' ' + nextLine).match(/[A-Za-z]+ [0-9]{1,2}, [0-9]{4}/); if (dateMatch) results['Previous Ex-Date'] = dateMatch[0]; } else if (line.includes('Frequency')) { if (line.toLowerCase().includes('quarterly') || nextLine.toLowerCase().includes('quarterly')) { results['Frequency'] = 'Quarterly'; } else if (line.toLowerCase().includes('monthly') || nextLine.toLowerCase().includes('monthly')) { results['Frequency'] = 'Monthly'; } else if (line.toLowerCase().includes('annual') || nextLine.toLowerCase().includes('annual')) { results['Frequency'] = 'Annual'; } } else if (line.includes('Annual Dividend Rate') || line.includes('IAD')) { const amountMatch = (line + ' ' + nextLine).match(/\$[0-9]+\.[0-9]+/); if (amountMatch) results['Annual Dividend Rate'] = amountMatch[0]; } else if (line.includes('Annual Dividend Yield')) { const percentMatch = (line + ' ' + nextLine).match(/[0-9]+\.[0-9]+%/); if (percentMatch) results['Annual Dividend Yield'] = percentMatch[0]; } } if (Object.keys(results).length > 0) { return results; } } // Strategy 3: Look within entire dividends panel for any structured content const dividendsPanel = document.querySelector('#dividends'); if (dividendsPanel) { const allElements = dividendsPanel.querySelectorAll('*'); for (let elem of allElements) { const text = elem.textContent || ''; // Look for dollar amounts near dividend-related text if (text.includes('Previous Dividend Payment') || text.includes('Dividend Payment')) { const parent = elem.parentElement; if (parent) { const siblings = Array.from(parent.children); const currentIndex = siblings.indexOf(elem); // Check next siblings for values for (let j = currentIndex + 1; j < siblings.length; j++) { const sibling = siblings[j]; const siblingText = sibling.textContent.trim(); const amountMatch = siblingText.match(/\$[0-9]+\.[0-9]+/); if (amountMatch) { results['Previous Dividend Payment'] = amountMatch[0]; break; } } } } // Similar logic for other fields... // (truncated for brevity but would include Pay Date, Ex-Date, etc.) } } return results; } ''') if debug: print(f"DEBUG: Dynamic dividend extraction results: {dividend_dynamic_data}") if dividend_dynamic_data: for field, value in dividend_dynamic_data.items(): existing_value = dividend_data.get(field, '') if should_replace_dividend_value(existing_value, value): dividend_data[field] = value if debug: print(f"DEBUG: Updated {field} from dynamic extraction: {value}") elif debug: print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring dynamic extraction value: {value})") except Exception as e: if debug: print(f"DEBUG: Error in dynamic dividend extraction: {e}") # Define dividend fields and their possible selectors as fallback dividend_fields = { 'Previous Dividend Payment': [ '#dividend-grid div:has-text("Previous Dividend Payment") ~ div', '#dividend-grid div:has-text("Dividend Payment") ~ div', '#dividends span:has-text("Previous Dividend Payment") + span', '#dividends div:has-text("Previous Dividend Payment") + div', '#dividends *:has-text("Previous Dividend Payment") ~ *', 'stock-dividends span:has-text("Previous Dividend Payment") + span', 'stock-dividends div:has-text("Previous Dividend Payment") + div', 'span:has-text("Previous Dividend Payment") + span', 'div:has-text("Previous Dividend Payment") + div', '*:has-text("Previous Dividend Payment") ~ *', 'span:has-text("Next Dividend Payment") + span', 'div:has-text("Next Dividend Payment") + div', '*:has-text("Next Dividend Payment") ~ *', '[data-field="dividend-payment"]', '.dividend-payment' ], 'Previous Pay Date': [ '#dividend-grid div:has-text("Previous Pay Date") ~ div', '#dividend-grid div:has-text("Pay Date") ~ div', '#dividends span:has-text("Previous Pay Date") + span', '#dividends div:has-text("Previous Pay Date") + div', '#dividends *:has-text("Previous Pay Date") ~ *', 'stock-dividends span:has-text("Previous Pay Date") + span', 'stock-dividends div:has-text("Previous Pay Date") + div', 'span:has-text("Previous Pay Date") + span', 'div:has-text("Previous Pay Date") + div', '*:has-text("Previous Pay Date") ~ *', 'span:has-text("Next Pay Date") + span', 'div:has-text("Next Pay Date") + div', '*:has-text("Next Pay Date") ~ *', '*:has-text("Pay Date") ~ *', '[data-field="pay-date"]', '.pay-date' ], 'Previous Ex-Date': [ '#dividend-grid div:has-text("Previous Ex-Date") ~ div', '#dividend-grid div:has-text("Ex-Date") ~ div', '#dividends span:has-text("Previous Ex-Date") + span', '#dividends div:has-text("Previous Ex-Date") + div', '#dividends *:has-text("Previous Ex-Date") ~ *', 'stock-dividends span:has-text("Previous Ex-Date") + span', 'stock-dividends div:has-text("Previous Ex-Date") + div', 'span:has-text("Previous Ex-Date") + span', 'div:has-text("Previous Ex-Date") + div', '*:has-text("Previous Ex-Date") ~ *', 'span:has-text("Next Ex-Date") + span', 'div:has-text("Next Ex-Date") + div', '*:has-text("Next Ex-Date") ~ *', '*:has-text("Ex-Date") ~ *', '[data-field="ex-date"]', '.ex-date' ], 'Frequency': [ '#dividend-grid div:has-text("Frequency") ~ div', '#dividends span:has-text("Frequency") + span', '#dividends div:has-text("Frequency") + div', '#dividends *:has-text("Frequency") ~ *', 'stock-dividends span:has-text("Frequency") + span', 'stock-dividends div:has-text("Frequency") + div', 'span:has-text("Frequency") + span', 'div:has-text("Frequency") + div', '*:has-text("Frequency") ~ *', '[data-field="frequency"]', '.dividend-frequency', '.frequency' ], 'Annual Dividend Rate': [ '#dividend-grid div:has-text("Annual Dividend Rate") ~ div', '#dividend-grid div:has-text("IAD") ~ div', '#dividends span:has-text("Annual Dividend Rate") + span', '#dividends div:has-text("Annual Dividend Rate") + div', '#dividends *:has-text("Annual Dividend Rate") ~ *', '#dividends span:has-text("IAD") + span', '#dividends *:has-text("IAD") ~ *', 'stock-dividends span:has-text("Annual Dividend Rate") + span', 'stock-dividends div:has-text("Annual Dividend Rate") + div', 'stock-dividends span:has-text("IAD") + span', 'span:has-text("Annual Dividend Rate") + span', 'div:has-text("Annual Dividend Rate") + div', '*:has-text("Annual Dividend Rate") ~ *', 'span:has-text("IAD") + span', '*:has-text("IAD") ~ *', '[data-field="annual-rate"]', '.annual-dividend-rate' ], 'Annual Dividend Yield': [ '#dividend-grid div:has-text("Annual Dividend Yield") ~ div', '#dividends span:has-text("Annual Dividend Yield") + span', '#dividends div:has-text("Annual Dividend Yield") + div', '#dividends *:has-text("Annual Dividend Yield") ~ *', 'stock-dividends span:has-text("Annual Dividend Yield") + span', 'stock-dividends div:has-text("Annual Dividend Yield") + div', 'span:has-text("Annual Dividend Yield") + span', 'div:has-text("Annual Dividend Yield") + div', '*:has-text("Annual Dividend Yield") ~ *', '[data-field="dividend-yield"]', '.dividend-yield' ] } # Extract each dividend field using multiple selector strategies for field_name, selectors in dividend_fields.items(): field_found = False # Try each selector for this field for selector in selectors: if field_found: break try: # Scope search within dividend section if found, otherwise search whole page full_selector = f'{dividend_section} {selector}' if dividend_section != 'body' else selector if await page.is_visible(full_selector, timeout=1000): value = await page.inner_text(full_selector) clean_value = value.strip() if clean_value and clean_value != field_name: # Ensure we got actual value, not the label existing_value = dividend_data.get(field_name, '') if should_replace_dividend_value(existing_value, clean_value): dividend_data[field_name] = clean_value field_found = True if debug: print(f"DEBUG: Found {field_name}: {clean_value} (selector: {full_selector})") elif debug: print(f"DEBUG: Keeping existing good data for {field_name}: {existing_value} (ignoring selector-based value: {clean_value})") break except: continue # If standard selectors failed, try JavaScript-based text search as fallback if not field_found: try: # Try multiple variations of the field name search_terms = [field_name] if "Previous" in field_name: search_terms.append(field_name.replace("Previous", "Next")) if "Annual Dividend Rate" in field_name: search_terms.append("IAD") if "Annual Dividend Yield" in field_name: search_terms.append("Dividend Yield") for search_term in search_terms: if field_found: break value = await page.evaluate(rf''' () => {{ const searchText = "{search_term}"; // First check within the dividends section specifically const dividendsPanel = document.querySelector('#dividends'); const stockDividends = document.querySelector('stock-dividends'); const searchContainers = [dividendsPanel, stockDividends, document]; for (let container of searchContainers) {{ if (!container) continue; const elements = Array.from(container.querySelectorAll('*')); for (let elem of elements) {{ if (elem.textContent && elem.textContent.includes(searchText)) {{ // Look for next sibling or nearby element with value let candidate = elem.nextElementSibling; if (candidate && candidate.textContent && !candidate.textContent.includes(searchText) && candidate.textContent.trim().length > 0) {{ return candidate.textContent.trim(); }} // Try parent's next sibling candidate = elem.parentElement?.nextElementSibling; if (candidate && candidate.textContent && !candidate.textContent.includes(searchText) && candidate.textContent.trim().length > 0) {{ return candidate.textContent.trim(); }} // Try looking in the same element's parent for nearby text const parent = elem.parentElement; if (parent) {{ const parentText = parent.textContent; const lines = parentText.split('\n'); for (let i = 0; i < lines.length; i++) {{ if (lines[i].includes(searchText) && i + 1 < lines.length) {{ const nextLine = lines[i + 1].trim(); if (nextLine && !nextLine.includes(searchText)) {{ return nextLine; }} }} }} }} }} }} // If found in this container, stop searching if (container !== document) {{ break; }} }} return null; }} ''') if value and value.strip(): existing_value = dividend_data.get(field_name, '') if should_replace_dividend_value(existing_value, value): dividend_data[field_name] = value.strip() field_found = True if debug: print(f"DEBUG: Found {field_name} via JS search with term '{search_term}': {value}") elif debug: print(f"DEBUG: Keeping existing good data for {field_name}: {existing_value} (ignoring JS search value: {value})") break except Exception as e: if debug: print(f"DEBUG: Could not find {field_name}: {e}") continue if debug: print(f"DEBUG: Extracted dividend data: {dividend_data}") return dividend_data except Exception as e: if debug: print(f"DEBUG: Error extracting dividend data: {e}") return dividend_data async def extract(page, debug: bool = False) -> Dict[str, Any]: """Compatibility wrapper to call `extract_dividend_data`""" return await extract_dividend_data(page, debug=debug)