All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
978 lines
52 KiB
Python
978 lines
52 KiB
Python
from typing import Dict, Any, Optional
|
|
from ...utils.logging import save_debug_artifact
|
|
|
|
|
|
def should_replace_dividend_value(existing_value: Optional[str], new_value: Optional[str]) -> bool:
|
|
"""
|
|
Decide whether to replace an existing dividend field value with a new one.
|
|
|
|
Rules:
|
|
- Never replace with empty/None values
|
|
- Replace if there is no existing value
|
|
- Replace if the existing value is "Show More" or contains "Show More"
|
|
- Otherwise, keep the existing (good) data
|
|
"""
|
|
if not new_value or not str(new_value).strip():
|
|
return False
|
|
if not existing_value:
|
|
return True
|
|
existing_text = str(existing_value)
|
|
if existing_text == 'Show More' or 'Show More' in existing_text:
|
|
return True
|
|
return False
|
|
|
|
|
|
async def extract_dividend_data(page, debug: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Extract dividend information from Schwab stock page.
|
|
Returns dictionary with dividend data fields.
|
|
"""
|
|
dividend_data: Dict[str, Any] = {}
|
|
|
|
try:
|
|
if debug:
|
|
print("DEBUG: Starting dividend data extraction...")
|
|
# Take initial screenshot to see page state
|
|
png = await page.screenshot(full_page=True)
|
|
path = save_debug_artifact("debug_dividend_start.png", png)
|
|
print(f"DEBUG: Initial screenshot saved as {path}")
|
|
|
|
# Wait for the dividends section to load dynamically
|
|
if debug:
|
|
print("DEBUG: Waiting for dividends section to load...")
|
|
|
|
try:
|
|
# First wait for the dividends panel to appear
|
|
await page.wait_for_selector('#dividends', timeout=15000)
|
|
if debug:
|
|
print("DEBUG: #dividends panel found")
|
|
|
|
# Wait for dividend content to load dynamically
|
|
dividend_loaded = False
|
|
max_attempts = 5 # Reduced from 10 for faster tests
|
|
attempt = 0
|
|
|
|
while not dividend_loaded and attempt < max_attempts:
|
|
attempt += 1
|
|
if debug:
|
|
print(f"DEBUG: Attempt {attempt}/{max_attempts} - Waiting for dynamic dividend content...")
|
|
|
|
# Check if the dividends section has been populated with actual content
|
|
dividend_status = await page.evaluate('''
|
|
() => {
|
|
const result = { loaded: false, debug: {} };
|
|
|
|
// Look for the dividends panel content that should be populated
|
|
const dividendsPanel = document.querySelector('#dividends');
|
|
if (dividendsPanel) {
|
|
const panelBody = dividendsPanel.querySelector('.sdps-panel__body');
|
|
if (panelBody) {
|
|
const textContent = panelBody.textContent || '';
|
|
result.debug.panelBodyLength = textContent.length;
|
|
result.debug.panelBodySample = textContent.substring(0, 200);
|
|
|
|
// Check if the panel has been populated with actual dividend text
|
|
// (not just empty comments)
|
|
const hasRealContent = textContent.length > 50 && (
|
|
textContent.includes('Previous Dividend') ||
|
|
textContent.includes('Pay Date') ||
|
|
textContent.includes('Ex-Date') ||
|
|
textContent.includes('Frequency') ||
|
|
textContent.includes('Annual Dividend') ||
|
|
textContent.includes('$') ||
|
|
textContent.includes('%')
|
|
);
|
|
|
|
if (hasRealContent) {
|
|
result.loaded = true;
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Alternative: check for stock-dividends component
|
|
const stockDividends = document.querySelector('stock-dividends');
|
|
if (stockDividends) {
|
|
const text = stockDividends.textContent || '';
|
|
result.debug.stockDividendsLength = text.length;
|
|
result.debug.stockDividendsSample = text.substring(0, 100);
|
|
|
|
if (text.length > 20 && text.includes('$')) {
|
|
result.loaded = true;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
// Alternative: check for any elements with dividend-related content
|
|
const allElements = document.querySelectorAll('#dividends *');
|
|
result.debug.totalElements = allElements.length;
|
|
|
|
for (let elem of allElements) {
|
|
const text = elem.textContent || '';
|
|
if (text.includes('Previous Dividend Payment') ||
|
|
(text.includes('$') && text.includes('.'))) {
|
|
result.loaded = true;
|
|
result.debug.foundInElement = elem.tagName + '.' + elem.className;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
''')
|
|
|
|
if debug:
|
|
print(f"DEBUG: Dividend status: {dividend_status}")
|
|
|
|
dividend_loaded = dividend_status.get('loaded', False)
|
|
|
|
if dividend_loaded:
|
|
if debug:
|
|
print("DEBUG: Dynamic dividend content loaded!")
|
|
png = await page.screenshot(full_page=True)
|
|
path = save_debug_artifact("debug_dividend_content_loaded.png", png)
|
|
print(f"DEBUG: Screenshot after content loaded: {path}")
|
|
break
|
|
|
|
# Wait between attempts to allow for async loading
|
|
await page.wait_for_timeout(1000) # Reduced from 2000ms for faster tests
|
|
|
|
if not dividend_loaded:
|
|
if debug:
|
|
print("DEBUG: Basic dividend content did not auto-load - this suggests the page is not behaving as expected")
|
|
print("DEBUG: Expected behavior: Basic dividend info should be visible without clicking 'Show More'")
|
|
|
|
# Try to force a page refresh or trigger loading
|
|
print("DEBUG: Attempting to trigger dividend content loading...")
|
|
try:
|
|
# Try scrolling to the dividend section to trigger lazy loading
|
|
await page.evaluate('''
|
|
() => {
|
|
const dividendsPanel = document.querySelector('#dividends');
|
|
if (dividendsPanel) {
|
|
dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
|
}
|
|
}
|
|
''')
|
|
await page.wait_for_timeout(3000)
|
|
|
|
# Try clicking on the dividends panel header to ensure it's active
|
|
try:
|
|
dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title')
|
|
if dividends_header:
|
|
await dividends_header.click()
|
|
await page.wait_for_timeout(2000)
|
|
print("DEBUG: Clicked on dividends panel header")
|
|
except:
|
|
pass
|
|
|
|
# Check one more time if content loaded
|
|
final_status = await page.evaluate('''
|
|
() => {
|
|
const dividendsPanel = document.querySelector('#dividends');
|
|
if (dividendsPanel) {
|
|
const panelBody = dividendsPanel.querySelector('.sdps-panel__body');
|
|
if (panelBody) {
|
|
const textContent = panelBody.textContent || '';
|
|
return {
|
|
length: textContent.length,
|
|
sample: textContent.substring(0, 500),
|
|
hasBasicData: textContent.includes('$') && (
|
|
textContent.includes('Previous') ||
|
|
textContent.includes('Pay Date') ||
|
|
textContent.includes('Ex-Date')
|
|
)
|
|
};
|
|
}
|
|
}
|
|
return { length: 0, sample: '', hasBasicData: false };
|
|
}
|
|
''')
|
|
|
|
if debug:
|
|
print(f"DEBUG: Final dividend panel status: {final_status}")
|
|
|
|
if final_status.get('hasBasicData'):
|
|
print("DEBUG: Basic dividend data now detected after manual triggering!")
|
|
dividend_loaded = True
|
|
|
|
# Extract the data immediately while it's loaded
|
|
immediate_extraction = await page.evaluate(r'''
|
|
() => {
|
|
const results = {};
|
|
const dividendsPanel = document.querySelector('#dividends');
|
|
|
|
if (dividendsPanel) {
|
|
const panelBody = dividendsPanel.querySelector('.sdps-panel__body');
|
|
if (panelBody) {
|
|
const fullText = panelBody.textContent || '';
|
|
|
|
// Extract data using pattern matching from the full text
|
|
const patterns = {
|
|
'Previous Dividend Payment': /Previous Dividend Payment\s*\$([0-9]+\.[0-9]+)/,
|
|
'Previous Pay Date': /Previous Pay Date\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/,
|
|
'Previous Ex-Date': /Previous Ex-Date\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/,
|
|
'Frequency': /Frequency\s*([A-Za-z]+)/,
|
|
'Annual Dividend Rate': /(?:Annual Dividend Rate|IAD).*?\$([0-9]+\.[0-9]+)/,
|
|
'Annual Dividend Yield': /([0-9]+\.[0-9]+%)(?=\s|Annual|$)/
|
|
};
|
|
|
|
for (const [field, pattern] of Object.entries(patterns)) {
|
|
const match = fullText.match(pattern);
|
|
if (match) {
|
|
if (field === 'Previous Dividend Payment' || field === 'Annual Dividend Rate') {
|
|
results[field] = '$' + match[1];
|
|
} else {
|
|
results[field] = match[1];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
''')
|
|
|
|
if debug:
|
|
print(f"DEBUG: Immediate extraction results: {immediate_extraction}")
|
|
|
|
if immediate_extraction:
|
|
dividend_data.update(immediate_extraction)
|
|
# Clean up the Frequency field if it has extra text
|
|
if 'Frequency' in dividend_data and 'Quarterly' in dividend_data['Frequency']:
|
|
dividend_data['Frequency'] = 'Quarterly'
|
|
|
|
except Exception as e:
|
|
if debug:
|
|
print(f"DEBUG: Error during manual triggering: {e}")
|
|
|
|
png = await page.screenshot(full_page=True)
|
|
path = save_debug_artifact("debug_dividend_timeout.png", png)
|
|
print(f"DEBUG: Screenshot after timeout: {path}")
|
|
|
|
except Exception as e:
|
|
if debug:
|
|
print(f"DEBUG: Error waiting for dividend content: {e}")
|
|
|
|
# Check for dividend grid directly without clicking
|
|
if debug:
|
|
print("DEBUG: Checking for #dividend-grid...")
|
|
|
|
dividend_grid_found = False
|
|
try:
|
|
await page.wait_for_selector('#dividend-grid', timeout=10000)
|
|
dividend_grid_found = True
|
|
if debug:
|
|
print("DEBUG: #dividend-grid found!")
|
|
png = await page.screenshot(full_page=True)
|
|
path = save_debug_artifact("debug_dividend_grid_found.png", png)
|
|
print(f"DEBUG: Screenshot with dividend grid: {path}")
|
|
except:
|
|
if debug:
|
|
print("DEBUG: #dividend-grid not found initially")
|
|
png = await page.screenshot(full_page=True)
|
|
path = save_debug_artifact("debug_dividend_no_grid.png", png)
|
|
print(f"DEBUG: Screenshot without grid: {path}")
|
|
|
|
# Try to scroll to the dividend section to ensure it's in view
|
|
if debug:
|
|
print("DEBUG: Scrolling to stock-dividends component...")
|
|
|
|
try:
|
|
await page.evaluate('''
|
|
() => {
|
|
const stockDividends = document.querySelector('stock-dividends');
|
|
if (stockDividends) {
|
|
stockDividends.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
|
}
|
|
}
|
|
''')
|
|
await page.wait_for_timeout(3000)
|
|
|
|
if debug:
|
|
png = await page.screenshot(full_page=True)
|
|
path = save_debug_artifact("debug_dividend_after_scroll.png", png)
|
|
print(f"DEBUG: Screenshot after scroll: {path}")
|
|
|
|
# Check again for dividend grid after scrolling
|
|
try:
|
|
await page.wait_for_selector('#dividend-grid', timeout=5000)
|
|
dividend_grid_found = True
|
|
if debug:
|
|
print("DEBUG: #dividend-grid found after scroll!")
|
|
png = await page.screenshot(full_page=True)
|
|
path = save_debug_artifact("debug_dividend_grid_after_scroll.png", png)
|
|
print(f"DEBUG: Screenshot with grid after scroll: {path}")
|
|
except:
|
|
if debug:
|
|
print("DEBUG: #dividend-grid still not found after scroll")
|
|
|
|
except Exception as e:
|
|
if debug:
|
|
print(f"DEBUG: Error during scroll attempt: {e}")
|
|
|
|
# Common dividend section selectors used by financial websites
|
|
dividend_selectors = [
|
|
'#dividend-grid', # Primary target based on user feedback
|
|
'stock-dividends', # Secondary target - the web component
|
|
'#dividend-section',
|
|
'#dividends-section',
|
|
'.dividend-summary',
|
|
'.dividends-summary',
|
|
'div[data-testid*="dividend"]',
|
|
'div[aria-label*="dividend"]',
|
|
'[class*="dividend"]',
|
|
'section:has-text("Dividend")',
|
|
'div:has-text("Previous Dividend Payment")'
|
|
]
|
|
|
|
# Try to find dividend section
|
|
dividend_section = None
|
|
for selector in dividend_selectors:
|
|
try:
|
|
if await page.is_visible(selector):
|
|
dividend_section = selector
|
|
if debug:
|
|
print(f"DEBUG: Found dividend section with selector: {selector}")
|
|
break
|
|
except:
|
|
continue
|
|
|
|
if not dividend_section:
|
|
if debug:
|
|
print("DEBUG: No dividend section found, trying broader search...")
|
|
|
|
# In debug mode, capture the page content to help identify selectors
|
|
page_content = await page.content()
|
|
path_html = save_debug_artifact("debug_dividend_page.html", page_content)
|
|
print(f"DEBUG: Page HTML saved to {path_html} for analysis")
|
|
|
|
# Also save a screenshot to see the visual layout
|
|
png = await page.screenshot(full_page=True)
|
|
path_png = save_debug_artifact("debug_dividend_page.png", png)
|
|
print(f"DEBUG: Page screenshot saved to {path_png}")
|
|
|
|
# Fallback: look for dividend-related text anywhere on page
|
|
dividend_text_exists = await page.evaluate('''
|
|
() => {
|
|
const text = document.body.innerText.toLowerCase();
|
|
return text.includes('dividend') || text.includes('ex-date') || text.includes('pay date') || text.includes('previous dividend') || text.includes('iad');
|
|
}
|
|
''')
|
|
|
|
if debug:
|
|
print(f"DEBUG: Dividend-related text found on page: {dividend_text_exists}")
|
|
|
|
# Try scrolling down to reveal more content
|
|
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
|
|
await page.wait_for_timeout(2000)
|
|
|
|
# Extract all text content that might contain dividend info
|
|
dividend_related_text = await page.evaluate('''
|
|
() => {
|
|
const text = document.body.innerText;
|
|
const lines = text.split('\n');
|
|
const dividendLines = lines.filter(line => {
|
|
const lower = line.toLowerCase();
|
|
return lower.includes('dividend') || lower.includes('ex-date') ||
|
|
lower.includes('pay date') || lower.includes('previous') ||
|
|
lower.includes('iad') || lower.includes('frequency') ||
|
|
lower.includes('quarterly') || lower.includes('$0.26') ||
|
|
lower.includes('0.4865%') || lower.includes('$1.04') ||
|
|
lower.includes('annual dividend') || lower.includes('yield');
|
|
});
|
|
return dividendLines;
|
|
}
|
|
''')
|
|
print(f"DEBUG: Found dividend-related text lines: {dividend_related_text}")
|
|
|
|
# Try a more comprehensive search for dividend data
|
|
all_dividend_info = await page.evaluate('''
|
|
() => {
|
|
// Look for elements containing common dividend field names
|
|
const fieldNames = [
|
|
'Previous Dividend Payment', 'Next Dividend Payment',
|
|
'Previous Pay Date', 'Next Pay Date',
|
|
'Previous Ex-Date', 'Next Ex-Date', 'Ex-Date',
|
|
'Frequency', 'Annual Dividend Rate', 'IAD',
|
|
'Annual Dividend Yield', 'Dividend Yield'
|
|
];
|
|
|
|
const results = {};
|
|
|
|
fieldNames.forEach(fieldName => {
|
|
// Search for elements containing this field name
|
|
const elements = Array.from(document.querySelectorAll('*')).filter(el =>
|
|
el.textContent && el.textContent.includes(fieldName) &&
|
|
el.children.length === 0 // Text nodes only
|
|
);
|
|
|
|
elements.forEach(el => {
|
|
// Look for value in nearby elements
|
|
const parent = el.parentElement;
|
|
if (parent) {
|
|
const siblings = Array.from(parent.children);
|
|
const currentIndex = siblings.indexOf(el);
|
|
|
|
// Check next siblings for values
|
|
for (let i = currentIndex + 1; i < siblings.length; i++) {
|
|
const sibling = siblings[i];
|
|
const text = sibling.textContent.trim();
|
|
if (text && text !== fieldName && text.length > 0 && text.length < 50) {
|
|
results[fieldName] = text;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Check same element for values after the field name
|
|
const fullText = el.textContent;
|
|
const fieldIndex = fullText.indexOf(fieldName);
|
|
if (fieldIndex >= 0) {
|
|
const afterField = fullText.substring(fieldIndex + fieldName.length).trim();
|
|
if (afterField && afterField.length > 0 && afterField.length < 50) {
|
|
results[fieldName] = afterField;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
});
|
|
|
|
return results;
|
|
}
|
|
''')
|
|
print(f"DEBUG: Comprehensive dividend search results: {all_dividend_info}")
|
|
|
|
# If we found data in the comprehensive search, use it only if we don't already have good data
|
|
if all_dividend_info:
|
|
for field, value in all_dividend_info.items():
|
|
if value and value.strip():
|
|
existing_value = dividend_data.get(field, '')
|
|
if should_replace_dividend_value(existing_value, value):
|
|
dividend_data[field] = value.strip()
|
|
if debug:
|
|
print(f"DEBUG: Added dividend field from comprehensive search: {field} = {value}")
|
|
elif debug:
|
|
print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring comprehensive search value: {value})")
|
|
|
|
if not dividend_text_exists:
|
|
if debug:
|
|
print("DEBUG: No dividend-related content found on page")
|
|
return dividend_data
|
|
|
|
# Use body as fallback section for broad search
|
|
dividend_section = 'body'
|
|
if debug:
|
|
print("DEBUG: Using body as dividend section for broad search")
|
|
|
|
# If we found the dividend grid, use specific selectors based on user feedback
|
|
if dividend_section == '#dividend-grid':
|
|
if debug:
|
|
print("DEBUG: Using specific dividend grid selectors...")
|
|
|
|
try:
|
|
# First check if dividend grid is actually present and populated
|
|
grid_status = await page.evaluate('''
|
|
() => {
|
|
const dividendGrid = document.querySelector('#dividend-grid');
|
|
if (!dividendGrid) return { found: false, message: 'No #dividend-grid element found' };
|
|
|
|
const textContent = dividendGrid.textContent || '';
|
|
const hasContent = textContent.trim().length > 50;
|
|
const childCount = dividendGrid.children.length;
|
|
|
|
return {
|
|
found: true,
|
|
hasContent,
|
|
textLength: textContent.length,
|
|
childCount,
|
|
preview: textContent.substring(0, 200),
|
|
message: `Grid found with ${childCount} children, ${textContent.length} chars`
|
|
};
|
|
}
|
|
''')
|
|
|
|
if debug:
|
|
print(f"DEBUG: Dividend grid status: {grid_status}")
|
|
|
|
# Extract dividend data using improved selectors
|
|
specific_dividend_data = await page.evaluate(r'''
|
|
() => {
|
|
const results = {};
|
|
|
|
// Check if dividend grid exists and has content
|
|
const dividendGrid = document.querySelector('#dividend-grid');
|
|
if (dividendGrid) {
|
|
const allGridText = dividendGrid.textContent || '';
|
|
const lines = allGridText.split('\n').map(line => line.trim()).filter(line => line.length > 0);
|
|
|
|
// Try structured approach first - look for rows/cells
|
|
const dividendRows = dividendGrid.querySelectorAll('div[class*="row"], tr, .dividend-row, div:has(div)');
|
|
dividendRows.forEach((row, rowIndex) => {
|
|
const rowText = row.textContent || '';
|
|
|
|
// Look for dividend payment info
|
|
if (rowText.includes('Dividend Payment') || (rowText.includes('Previous') && rowText.includes('$'))) {
|
|
const amountMatch = rowText.match(/\$[0-9]+\.[0-9]+/);
|
|
if (amountMatch && !results['Previous Dividend Payment']) {
|
|
results['Previous Dividend Payment'] = amountMatch[0];
|
|
}
|
|
|
|
// Look for dates in the same row
|
|
const dateMatches = rowText.match(/([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/g);
|
|
if (dateMatches) {
|
|
if (dateMatches.length >= 1 && !results['Previous Pay Date']) results['Previous Pay Date'] = dateMatches[0];
|
|
if (dateMatches.length >= 2 && !results['Previous Ex-Date']) results['Previous Ex-Date'] = dateMatches[1];
|
|
}
|
|
}
|
|
});
|
|
|
|
// Fallback: Parse all lines systematically
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
const nextLine = i + 1 < lines.length ? lines[i + 1] : '';
|
|
|
|
// Match dividend payment
|
|
if ((line.includes('Previous Dividend Payment') || line.includes('Dividend Payment')) && !results['Previous Dividend Payment']) {
|
|
const amountPattern = /\$[0-9]+\.[0-9]+/;
|
|
let amount = line.match(amountPattern) || nextLine.match(amountPattern);
|
|
if (amount) results['Previous Dividend Payment'] = amount[0];
|
|
}
|
|
|
|
// Match pay date
|
|
if (line.includes('Pay Date') && !results['Previous Pay Date']) {
|
|
const datePattern = /[A-Za-z]{3,9} [0-9]{1,2}, [0-9]{4}/;
|
|
let date = line.match(datePattern) || nextLine.match(datePattern);
|
|
if (date) results['Previous Pay Date'] = date[0];
|
|
}
|
|
|
|
// Match ex-date
|
|
if (line.includes('Ex-Date') && !results['Previous Ex-Date']) {
|
|
const datePattern = /[A-Za-z]{3,9} [0-9]{1,2}, [0-9]{4}/;
|
|
let date = line.match(datePattern) || nextLine.match(datePattern);
|
|
if (date) results['Previous Ex-Date'] = date[0];
|
|
}
|
|
|
|
// Match frequency
|
|
if (line.includes('Frequency') && !results['Frequency']) {
|
|
const freqLine = line + ' ' + nextLine;
|
|
if (freqLine.toLowerCase().includes('quarterly')) results['Frequency'] = 'Quarterly';
|
|
else if (freqLine.toLowerCase().includes('monthly')) results['Frequency'] = 'Monthly';
|
|
else if (freqLine.toLowerCase().includes('annual')) results['Frequency'] = 'Annual';
|
|
else if (freqLine.toLowerCase().includes('semi')) results['Frequency'] = 'Semi-Annual';
|
|
}
|
|
|
|
// Match annual dividend rate
|
|
if ((line.includes('Annual Dividend Rate') || line.includes('IAD')) && !results['Annual Dividend Rate']) {
|
|
const amountPattern = /\$[0-9]+\.[0-9]+/;
|
|
let amount = line.match(amountPattern) || nextLine.match(amountPattern);
|
|
if (amount) results['Annual Dividend Rate'] = amount[0];
|
|
}
|
|
|
|
// Match annual dividend yield
|
|
if (line.includes('Annual Dividend Yield') && !results['Annual Dividend Yield']) {
|
|
const percentPattern = /[0-9]+\.[0-9]+%/;
|
|
let percent = line.match(percentPattern) || nextLine.match(percentPattern);
|
|
if (percent) results['Annual Dividend Yield'] = percent[0];
|
|
}
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
''')
|
|
|
|
if debug:
|
|
print(f"DEBUG: Specific dividend grid extraction results: {specific_dividend_data}")
|
|
|
|
# Add the extracted data to dividend_data only if we don't already have good data
|
|
if specific_dividend_data:
|
|
for field, value in specific_dividend_data.items():
|
|
existing_value = dividend_data.get(field, '')
|
|
if should_replace_dividend_value(existing_value, value):
|
|
dividend_data[field] = value
|
|
if debug:
|
|
print(f"DEBUG: Updated {field} from specific extraction: {value}")
|
|
elif debug:
|
|
print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring specific extraction value: {value})")
|
|
|
|
except Exception as e:
|
|
if debug:
|
|
print(f"DEBUG: Error in specific dividend grid extraction: {e}")
|
|
|
|
# Extract dividend data using the correct structure from gemini analysis
|
|
if debug:
|
|
print("DEBUG: Extracting dividend data from dividend-grid structure...")
|
|
|
|
# First try to extract data from the dynamically loaded dividend content
|
|
try:
|
|
dividend_dynamic_data = await page.evaluate(r'''
|
|
() => {
|
|
const results = {};
|
|
|
|
// Strategy 1: Look for any dividend grid structure that was loaded
|
|
const dividendGrid = document.querySelector('#dividend-grid');
|
|
if (dividendGrid) {
|
|
const rows = dividendGrid.querySelectorAll('div.sdps-row, .row');
|
|
|
|
for (let row of rows) {
|
|
const cells = row.querySelectorAll('div[class*="col-"]');
|
|
if (cells.length >= 2) {
|
|
const label = cells[0].textContent.trim();
|
|
const value = cells[1].textContent.trim();
|
|
|
|
// Map the labels to our expected field names
|
|
if (label.includes('Previous Dividend Payment') || label.includes('Dividend Payment')) {
|
|
results['Previous Dividend Payment'] = value;
|
|
} else if (label.includes('Previous Pay Date') || label.includes('Pay Date')) {
|
|
results['Previous Pay Date'] = value;
|
|
} else if (label.includes('Previous Ex-Date') || label.includes('Ex-Date')) {
|
|
results['Previous Ex-Date'] = value;
|
|
} else if (label.includes('Frequency')) {
|
|
results['Frequency'] = value;
|
|
} else if (label.includes('Annual Dividend Rate') || label.includes('IAD')) {
|
|
results['Annual Dividend Rate'] = value;
|
|
} else if (label.includes('Annual Dividend Yield')) {
|
|
results['Annual Dividend Yield'] = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (Object.keys(results).length > 0) {
|
|
return results;
|
|
}
|
|
}
|
|
|
|
// Strategy 2: Look for stock-dividends component content
|
|
const stockDividends = document.querySelector('stock-dividends');
|
|
if (stockDividends) {
|
|
const allText = stockDividends.textContent || '';
|
|
const lines = allText.split('\n').map(line => line.trim()).filter(line => line);
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
const nextLine = i + 1 < lines.length ? lines[i + 1] : '';
|
|
|
|
if (line.includes('Previous Dividend Payment') || line.includes('Dividend Payment')) {
|
|
const amountMatch = (line + ' ' + nextLine).match(/\$[0-9]+\.[0-9]+/);
|
|
if (amountMatch) results['Previous Dividend Payment'] = amountMatch[0];
|
|
} else if (line.includes('Pay Date')) {
|
|
const dateMatch = (line + ' ' + nextLine).match(/[A-Za-z]+ [0-9]{1,2}, [0-9]{4}/);
|
|
if (dateMatch) results['Previous Pay Date'] = dateMatch[0];
|
|
} else if (line.includes('Ex-Date')) {
|
|
const dateMatch = (line + ' ' + nextLine).match(/[A-Za-z]+ [0-9]{1,2}, [0-9]{4}/);
|
|
if (dateMatch) results['Previous Ex-Date'] = dateMatch[0];
|
|
} else if (line.includes('Frequency')) {
|
|
if (line.toLowerCase().includes('quarterly') || nextLine.toLowerCase().includes('quarterly')) {
|
|
results['Frequency'] = 'Quarterly';
|
|
} else if (line.toLowerCase().includes('monthly') || nextLine.toLowerCase().includes('monthly')) {
|
|
results['Frequency'] = 'Monthly';
|
|
} else if (line.toLowerCase().includes('annual') || nextLine.toLowerCase().includes('annual')) {
|
|
results['Frequency'] = 'Annual';
|
|
}
|
|
} else if (line.includes('Annual Dividend Rate') || line.includes('IAD')) {
|
|
const amountMatch = (line + ' ' + nextLine).match(/\$[0-9]+\.[0-9]+/);
|
|
if (amountMatch) results['Annual Dividend Rate'] = amountMatch[0];
|
|
} else if (line.includes('Annual Dividend Yield')) {
|
|
const percentMatch = (line + ' ' + nextLine).match(/[0-9]+\.[0-9]+%/);
|
|
if (percentMatch) results['Annual Dividend Yield'] = percentMatch[0];
|
|
}
|
|
}
|
|
|
|
if (Object.keys(results).length > 0) {
|
|
return results;
|
|
}
|
|
}
|
|
|
|
// Strategy 3: Look within entire dividends panel for any structured content
|
|
const dividendsPanel = document.querySelector('#dividends');
|
|
if (dividendsPanel) {
|
|
const allElements = dividendsPanel.querySelectorAll('*');
|
|
|
|
for (let elem of allElements) {
|
|
const text = elem.textContent || '';
|
|
|
|
// Look for dollar amounts near dividend-related text
|
|
if (text.includes('Previous Dividend Payment') || text.includes('Dividend Payment')) {
|
|
const parent = elem.parentElement;
|
|
if (parent) {
|
|
const siblings = Array.from(parent.children);
|
|
const currentIndex = siblings.indexOf(elem);
|
|
|
|
// Check next siblings for values
|
|
for (let j = currentIndex + 1; j < siblings.length; j++) {
|
|
const sibling = siblings[j];
|
|
const siblingText = sibling.textContent.trim();
|
|
const amountMatch = siblingText.match(/\$[0-9]+\.[0-9]+/);
|
|
if (amountMatch) {
|
|
results['Previous Dividend Payment'] = amountMatch[0];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Similar logic for other fields...
|
|
// (truncated for brevity but would include Pay Date, Ex-Date, etc.)
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|
|
''')
|
|
|
|
if debug:
|
|
print(f"DEBUG: Dynamic dividend extraction results: {dividend_dynamic_data}")
|
|
|
|
if dividend_dynamic_data:
|
|
for field, value in dividend_dynamic_data.items():
|
|
existing_value = dividend_data.get(field, '')
|
|
if should_replace_dividend_value(existing_value, value):
|
|
dividend_data[field] = value
|
|
if debug:
|
|
print(f"DEBUG: Updated {field} from dynamic extraction: {value}")
|
|
elif debug:
|
|
print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring dynamic extraction value: {value})")
|
|
|
|
except Exception as e:
|
|
if debug:
|
|
print(f"DEBUG: Error in dynamic dividend extraction: {e}")
|
|
|
|
# Define dividend fields and their possible selectors as fallback
|
|
dividend_fields = {
|
|
'Previous Dividend Payment': [
|
|
'#dividend-grid div:has-text("Previous Dividend Payment") ~ div',
|
|
'#dividend-grid div:has-text("Dividend Payment") ~ div',
|
|
'#dividends span:has-text("Previous Dividend Payment") + span',
|
|
'#dividends div:has-text("Previous Dividend Payment") + div',
|
|
'#dividends *:has-text("Previous Dividend Payment") ~ *',
|
|
'stock-dividends span:has-text("Previous Dividend Payment") + span',
|
|
'stock-dividends div:has-text("Previous Dividend Payment") + div',
|
|
'span:has-text("Previous Dividend Payment") + span',
|
|
'div:has-text("Previous Dividend Payment") + div',
|
|
'*:has-text("Previous Dividend Payment") ~ *',
|
|
'span:has-text("Next Dividend Payment") + span',
|
|
'div:has-text("Next Dividend Payment") + div',
|
|
'*:has-text("Next Dividend Payment") ~ *',
|
|
'[data-field="dividend-payment"]',
|
|
'.dividend-payment'
|
|
],
|
|
'Previous Pay Date': [
|
|
'#dividend-grid div:has-text("Previous Pay Date") ~ div',
|
|
'#dividend-grid div:has-text("Pay Date") ~ div',
|
|
'#dividends span:has-text("Previous Pay Date") + span',
|
|
'#dividends div:has-text("Previous Pay Date") + div',
|
|
'#dividends *:has-text("Previous Pay Date") ~ *',
|
|
'stock-dividends span:has-text("Previous Pay Date") + span',
|
|
'stock-dividends div:has-text("Previous Pay Date") + div',
|
|
'span:has-text("Previous Pay Date") + span',
|
|
'div:has-text("Previous Pay Date") + div',
|
|
'*:has-text("Previous Pay Date") ~ *',
|
|
'span:has-text("Next Pay Date") + span',
|
|
'div:has-text("Next Pay Date") + div',
|
|
'*:has-text("Next Pay Date") ~ *',
|
|
'*:has-text("Pay Date") ~ *',
|
|
'[data-field="pay-date"]',
|
|
'.pay-date'
|
|
],
|
|
'Previous Ex-Date': [
|
|
'#dividend-grid div:has-text("Previous Ex-Date") ~ div',
|
|
'#dividend-grid div:has-text("Ex-Date") ~ div',
|
|
'#dividends span:has-text("Previous Ex-Date") + span',
|
|
'#dividends div:has-text("Previous Ex-Date") + div',
|
|
'#dividends *:has-text("Previous Ex-Date") ~ *',
|
|
'stock-dividends span:has-text("Previous Ex-Date") + span',
|
|
'stock-dividends div:has-text("Previous Ex-Date") + div',
|
|
'span:has-text("Previous Ex-Date") + span',
|
|
'div:has-text("Previous Ex-Date") + div',
|
|
'*:has-text("Previous Ex-Date") ~ *',
|
|
'span:has-text("Next Ex-Date") + span',
|
|
'div:has-text("Next Ex-Date") + div',
|
|
'*:has-text("Next Ex-Date") ~ *',
|
|
'*:has-text("Ex-Date") ~ *',
|
|
'[data-field="ex-date"]',
|
|
'.ex-date'
|
|
],
|
|
'Frequency': [
|
|
'#dividend-grid div:has-text("Frequency") ~ div',
|
|
'#dividends span:has-text("Frequency") + span',
|
|
'#dividends div:has-text("Frequency") + div',
|
|
'#dividends *:has-text("Frequency") ~ *',
|
|
'stock-dividends span:has-text("Frequency") + span',
|
|
'stock-dividends div:has-text("Frequency") + div',
|
|
'span:has-text("Frequency") + span',
|
|
'div:has-text("Frequency") + div',
|
|
'*:has-text("Frequency") ~ *',
|
|
'[data-field="frequency"]',
|
|
'.dividend-frequency',
|
|
'.frequency'
|
|
],
|
|
'Annual Dividend Rate': [
|
|
'#dividend-grid div:has-text("Annual Dividend Rate") ~ div',
|
|
'#dividend-grid div:has-text("IAD") ~ div',
|
|
'#dividends span:has-text("Annual Dividend Rate") + span',
|
|
'#dividends div:has-text("Annual Dividend Rate") + div',
|
|
'#dividends *:has-text("Annual Dividend Rate") ~ *',
|
|
'#dividends span:has-text("IAD") + span',
|
|
'#dividends *:has-text("IAD") ~ *',
|
|
'stock-dividends span:has-text("Annual Dividend Rate") + span',
|
|
'stock-dividends div:has-text("Annual Dividend Rate") + div',
|
|
'stock-dividends span:has-text("IAD") + span',
|
|
'span:has-text("Annual Dividend Rate") + span',
|
|
'div:has-text("Annual Dividend Rate") + div',
|
|
'*:has-text("Annual Dividend Rate") ~ *',
|
|
'span:has-text("IAD") + span',
|
|
'*:has-text("IAD") ~ *',
|
|
'[data-field="annual-rate"]',
|
|
'.annual-dividend-rate'
|
|
],
|
|
'Annual Dividend Yield': [
|
|
'#dividend-grid div:has-text("Annual Dividend Yield") ~ div',
|
|
'#dividends span:has-text("Annual Dividend Yield") + span',
|
|
'#dividends div:has-text("Annual Dividend Yield") + div',
|
|
'#dividends *:has-text("Annual Dividend Yield") ~ *',
|
|
'stock-dividends span:has-text("Annual Dividend Yield") + span',
|
|
'stock-dividends div:has-text("Annual Dividend Yield") + div',
|
|
'span:has-text("Annual Dividend Yield") + span',
|
|
'div:has-text("Annual Dividend Yield") + div',
|
|
'*:has-text("Annual Dividend Yield") ~ *',
|
|
'[data-field="dividend-yield"]',
|
|
'.dividend-yield'
|
|
]
|
|
}
|
|
|
|
# Extract each dividend field using multiple selector strategies
|
|
for field_name, selectors in dividend_fields.items():
|
|
field_found = False
|
|
|
|
# Try each selector for this field
|
|
for selector in selectors:
|
|
if field_found:
|
|
break
|
|
|
|
try:
|
|
# Scope search within dividend section if found, otherwise search whole page
|
|
full_selector = f'{dividend_section} {selector}' if dividend_section != 'body' else selector
|
|
|
|
if await page.is_visible(full_selector, timeout=1000):
|
|
value = await page.inner_text(full_selector)
|
|
clean_value = value.strip()
|
|
|
|
if clean_value and clean_value != field_name: # Ensure we got actual value, not the label
|
|
existing_value = dividend_data.get(field_name, '')
|
|
if should_replace_dividend_value(existing_value, clean_value):
|
|
dividend_data[field_name] = clean_value
|
|
field_found = True
|
|
if debug:
|
|
print(f"DEBUG: Found {field_name}: {clean_value} (selector: {full_selector})")
|
|
elif debug:
|
|
print(f"DEBUG: Keeping existing good data for {field_name}: {existing_value} (ignoring selector-based value: {clean_value})")
|
|
break
|
|
except:
|
|
continue
|
|
|
|
# If standard selectors failed, try JavaScript-based text search as fallback
|
|
if not field_found:
|
|
try:
|
|
# Try multiple variations of the field name
|
|
search_terms = [field_name]
|
|
if "Previous" in field_name:
|
|
search_terms.append(field_name.replace("Previous", "Next"))
|
|
if "Annual Dividend Rate" in field_name:
|
|
search_terms.append("IAD")
|
|
if "Annual Dividend Yield" in field_name:
|
|
search_terms.append("Dividend Yield")
|
|
|
|
for search_term in search_terms:
|
|
if field_found:
|
|
break
|
|
|
|
value = await page.evaluate(rf'''
|
|
() => {{
|
|
const searchText = "{search_term}";
|
|
|
|
// First check within the dividends section specifically
|
|
const dividendsPanel = document.querySelector('#dividends');
|
|
const stockDividends = document.querySelector('stock-dividends');
|
|
const searchContainers = [dividendsPanel, stockDividends, document];
|
|
|
|
for (let container of searchContainers) {{
|
|
if (!container) continue;
|
|
|
|
const elements = Array.from(container.querySelectorAll('*'));
|
|
|
|
for (let elem of elements) {{
|
|
if (elem.textContent && elem.textContent.includes(searchText)) {{
|
|
// Look for next sibling or nearby element with value
|
|
let candidate = elem.nextElementSibling;
|
|
if (candidate && candidate.textContent &&
|
|
!candidate.textContent.includes(searchText) &&
|
|
candidate.textContent.trim().length > 0) {{
|
|
return candidate.textContent.trim();
|
|
}}
|
|
|
|
// Try parent's next sibling
|
|
candidate = elem.parentElement?.nextElementSibling;
|
|
if (candidate && candidate.textContent &&
|
|
!candidate.textContent.includes(searchText) &&
|
|
candidate.textContent.trim().length > 0) {{
|
|
return candidate.textContent.trim();
|
|
}}
|
|
|
|
// Try looking in the same element's parent for nearby text
|
|
const parent = elem.parentElement;
|
|
if (parent) {{
|
|
const parentText = parent.textContent;
|
|
const lines = parentText.split('\n');
|
|
for (let i = 0; i < lines.length; i++) {{
|
|
if (lines[i].includes(searchText) && i + 1 < lines.length) {{
|
|
const nextLine = lines[i + 1].trim();
|
|
if (nextLine && !nextLine.includes(searchText)) {{
|
|
return nextLine;
|
|
}}
|
|
}}
|
|
}}
|
|
}}
|
|
}}
|
|
}}
|
|
|
|
// If found in this container, stop searching
|
|
if (container !== document) {{
|
|
break;
|
|
}}
|
|
}}
|
|
return null;
|
|
}}
|
|
''')
|
|
|
|
if value and value.strip():
|
|
existing_value = dividend_data.get(field_name, '')
|
|
if should_replace_dividend_value(existing_value, value):
|
|
dividend_data[field_name] = value.strip()
|
|
field_found = True
|
|
if debug:
|
|
print(f"DEBUG: Found {field_name} via JS search with term '{search_term}': {value}")
|
|
elif debug:
|
|
print(f"DEBUG: Keeping existing good data for {field_name}: {existing_value} (ignoring JS search value: {value})")
|
|
break
|
|
|
|
except Exception as e:
|
|
if debug:
|
|
print(f"DEBUG: Could not find {field_name}: {e}")
|
|
continue
|
|
|
|
if debug:
|
|
print(f"DEBUG: Extracted dividend data: {dividend_data}")
|
|
|
|
return dividend_data
|
|
|
|
except Exception as e:
|
|
if debug:
|
|
print(f"DEBUG: Error extracting dividend data: {e}")
|
|
return dividend_data
|
|
|
|
|
|
async def extract(page, debug: bool = False) -> Dict[str, Any]:
|
|
"""Compatibility wrapper to call `extract_dividend_data`"""
|
|
return await extract_dividend_data(page, debug=debug)
|