fix(positions): sync latest scraper fixes from main repository
All checks were successful
Build and Push Docker Image / build (push) Successful in 36s

This commit is contained in:
2026-04-24 21:34:38 +00:00
parent 429a2832fd
commit a05ba3b8a8
4 changed files with 385 additions and 97 deletions

View File

@@ -1006,8 +1006,11 @@ async def login_to_schwab(username: str, password: str) -> Optional[List[Dict[st
mfa_code = None
try:
logger.info("Waiting 5 seconds for email code to arrive before checking webhook...")
await asyncio.sleep(5)
async with aiohttp.ClientSession() as session:
for idx in range(60): # 2 minutes, every 2 seconds
for attempt in range(2):
print(f"Checking webhook for code (attempt {attempt + 1}/2)...")
try:
async with session.get("https://n8n.ext.ben.io/webhook/schwab-token") as resp:
if resp.status == 200:
@@ -1016,25 +1019,37 @@ async def login_to_schwab(username: str, password: str) -> Optional[List[Dict[st
# Parse based on expected n8n output formats
code = None
if isinstance(data, dict):
code = data.get("code") or data.get("token") or data.get("body", {}).get("code")
code = data.get("code") or data.get("token") or data.get("login_code") or data.get("body", {}).get("code")
elif isinstance(data, list) and len(data) > 0:
code = data[-1].get("code") or data[-1].get("token")
code = data[-1].get("code") or data[-1].get("token") or data[-1].get("login_code")
if code:
mfa_code = code
logger.info(f"Got MFA code from webhook: {mfa_code}")
break
else:
logger.warning("Webhook returned data but no code found inside.")
else:
logger.warning(f"Webhook returned status code {resp.status}")
except Exception as e:
logger.debug(f"Webhook poll error: {e}")
if idx % 10 == 0:
print(f"Still waiting for webhook code... ({idx*2}s/120s)")
await asyncio.sleep(2)
if not mfa_code and attempt == 0:
logger.info("Token not found, waiting 10 seconds before 1 retry...")
await asyncio.sleep(10)
except Exception as loop_e:
logger.error(f"Error during webhook polling loop: {loop_e}")
logger.error(f"Error during webhook checking: {loop_e}")
if mfa_code:
logger.info("Entering MFA code into form...")
try:
# When on the sws-gateway-nr OTP page, the form is rendered
# directly on the page — there is no #lmsIframe wrapper here.
# Only look for the iframe when on the client.schwab.com login page.
current_page_url = page.url
if 'sws-gateway-nr' in current_page_url or 'otp' in current_page_url:
logger.debug(f"OTP page detected ({current_page_url}), querying form directly on page")
target = page
else:
target = page
iframe_element = await page.query_selector('#lmsIframe')
if iframe_element:
@@ -1044,6 +1059,7 @@ async def login_to_schwab(username: str, password: str) -> Optional[List[Dict[st
code_input = await target.query_selector('input[type="text"], input[type="tel"], input[name*="code" i], input[id*="code" i], input[autocomplete*="one-time-code" i]')
if code_input:
await code_input.fill(str(mfa_code))
logger.info(f"Filled OTP field with code: {mfa_code}")
# Sometimes the submit button specifically says 'Trust device' or similar
submit_btn = await target.query_selector('button[type="submit"], button:has-text("Continue"), button:has-text("Verify"), button:has-text("Submit"), button:has-text("Log in"), button[id*="submit"], button[id*="continue"]')
@@ -1052,7 +1068,10 @@ async def login_to_schwab(username: str, password: str) -> Optional[List[Dict[st
print("Submitted MFA code successfully.")
await page.wait_for_timeout(5000)
else:
logger.warning("Submit button not found after filling OTP — waiting anyway")
await page.wait_for_timeout(5000)
else:
logger.error("OTP input field not found on page")
except Exception as e:
logger.error(f"Failed to enter MFA code: {e}")

View File

@@ -134,7 +134,8 @@ async def async_main():
return
if args.positions is not None:
account_arg = args.positions or None
# If --positions has a value, use it. Otherwise, fall back to --account.
account_arg = args.positions if args.positions != "" else args.account
envelope = await unified_api.get_positions(
account=account_arg,
include_non_equity=args.include_non_equity,

View File

@@ -9,6 +9,7 @@ from ...browser.client import connect, new_context, new_page
from ...browser.navigation import goto_with_auth_check
from ...core import AccountSummary, Envelope, ErrorType, Lot, Position, fail, ok
from ...core.config import get_playwright_url, load_config
from ...utils.logging import save_debug_artifact
POSITIONS_URL = "https://client.schwab.com/app/accounts/positions/#/"
@@ -138,9 +139,13 @@ async def _evaluate_table(page) -> dict[str, Any] | None:
return null;
}
const headers = Array.from(table.querySelectorAll('thead tr th')).map((th) =>
(th.innerText || th.textContent || '').trim()
);
const headers = Array.from(table.querySelectorAll('thead tr th')).map((th) => {
const btn = th.querySelector('button, .sdps-tables__header-text');
if (btn) {
return (btn.innerText || btn.textContent || '').trim();
}
return (th.innerText || th.textContent || '').trim();
});
const rowElements = Array.from(table.querySelectorAll('tbody tr'));
const rows = [];
@@ -149,11 +154,8 @@ async def _evaluate_table(page) -> dict[str, Any] | None:
const isLotRow = (row) => {
const klass = (row.className || '').toLowerCase();
if (klass.includes('lot') || klass.includes('sub') || klass.includes('child')) {
return true;
}
const dataRole = (row.getAttribute('data-row-type') || '').toLowerCase();
return dataRole.includes('lot');
const tagName = (row.tagName || '').toLowerCase();
return klass.includes('lot') || klass.includes('child') || tagName.includes('app-lot');
};
const isPositionRow = (row) => {
@@ -181,9 +183,34 @@ async def _evaluate_table(page) -> dict[str, Any] | None:
continue;
}
const cells = Array.from(row.querySelectorAll('td')).map((cell) =>
(cell.innerText || cell.textContent || '').trim()
);
const cells = Array.from(row.querySelectorAll('td')).map((cell) => {
// 1. Try to find a title attribute on a span (often has more precise value)
const titledSpan = cell.querySelector('span[title]');
if (titledSpan && titledSpan.getAttribute('title').trim().length > 0) {
const title = titledSpan.getAttribute('title').trim();
if (title.includes('$') || /^[+-]?[\\d,.]+$/.test(title) || title.includes('%')) {
return title;
}
}
// 2. Try to find text directly or within common button/link wrappers
const btn = cell.querySelector('button, a, .sdps-button');
if (btn) {
// Check button title too
if (btn.hasAttribute('title') && btn.getAttribute('title').trim().length > 0) {
return btn.getAttribute('title').trim();
}
// Ignore some internal elements like superscripts if present
const clone = btn.cloneNode(true);
clone.querySelectorAll('sup, .sdps-sr-only').forEach(el => el.remove());
return (clone.innerText || clone.textContent || '').trim();
}
// 3. Just clean up the cell text
const clone = cell.cloneNode(true);
clone.querySelectorAll('sup, .sdps-sr-only').forEach(el => el.remove());
return (clone.innerText || clone.textContent || '').trim();
});
if (!cells.length) {
continue;
@@ -191,7 +218,8 @@ async def _evaluate_table(page) -> dict[str, Any] | None:
if (isLotRow(row)) {
if (current) {
current.lots.push(cells);
// For lots, we typically skip the first two columns (empty/checkbox)
current.lots.push(cells.slice(2));
}
} else if (isPositionRow(row)) {
// Extract symbol from data-symbol attribute
@@ -216,40 +244,43 @@ async def _evaluate_table(page) -> dict[str, Any] | None:
def _map_row(headers: Sequence[str], cells: Sequence[str]) -> dict[str, str]:
result: dict[str, str] = {}
# Special handling: The table has columns in headers that don't correspond to cells
# Headers: ['', 'Symbol', 'Description', 'Qty', 'Price', ...]
# Cells: ['VANGUARD...', '192.5', '$328.17', ...]
# The first two headers (empty checkbox and Symbol) have no corresponding cells
# So: Cell 0 → 'Description', Cell 1 → 'Qty', Cell 2 → 'Price', etc.
# Filter out empty headers to get the list of "real" data columns
data_headers = []
for h in headers:
# Replace non-breaking spaces and other special whitespace with regular spaces
h_clean = h.replace('\u00a0', ' ').replace('\u200b', '').strip()
name = h_clean.split('\n')[0].strip().lower()
if name:
data_headers.append(name)
else:
data_headers.append(f"empty_{len(data_headers)}")
# Find the symbol header index to know where the offset starts
symbol_header_idx = None
for idx, header in enumerate(headers):
key = header.strip().lower()
if 'symbol' in key and 'description' not in key:
symbol_header_idx = idx
# We skip headers that definitely don't have cells (checkbox, symbol is usually in data-symbol)
# Looking at debug output, 'description' is the first cell.
# So we find where 'description' or 'name' is in our data_headers.
start_idx = -1
for i, h in enumerate(data_headers):
if h in {'description', 'name'}:
start_idx = i
break
# Calculate offset - typically 2 (empty column + symbol column)
offset = symbol_header_idx + 1 if symbol_header_idx is not None else 0
if start_idx == -1:
# Fallback to simple index mapping if we can't find description
for i, cell in enumerate(cells):
key = data_headers[i] if i < len(data_headers) else f"column_{i}"
result[key] = cell
return result
for idx, header in enumerate(headers):
# Normalize header: take first line, strip, lowercase
# Headers often have format "Label\nsort\nfieldname"
header_parts = header.strip().split('\n')
key = header_parts[0].strip().lower() if header_parts else ""
if not key:
key = f"column_{idx}"
# Map header to cell with offset
if idx < offset:
# These headers (empty, symbol) have no corresponding cells
value = ""
# Map cells starting from description
for i, cell in enumerate(cells):
header_idx = start_idx + i
if header_idx < len(data_headers):
key = data_headers[header_idx]
result[key] = cell
else:
cell_idx = idx - offset
value = cells[cell_idx].strip() if cell_idx < len(cells) else ""
result[f"extra_{i}"] = cell
result[key] = value
return result
@@ -259,15 +290,20 @@ def _parse_lots(lot_rows: Sequence[Sequence[str]]) -> list[Lot]:
if not cells:
continue
# New modal table columns:
# 0: Open Date, 1: Quantity, 2: Price, 3: Cost/Share, 4: Market Value, 5: Cost Basis, ...
acquired_date = cells[0].strip() if len(cells) > 0 else None
quantity = _parse_float(cells[1] if len(cells) > 1 else None)
cost_basis = _parse_decimal(cells[2] if len(cells) > 2 else None)
lot_id = cells[3].strip() if len(cells) > 3 else None
# In the modal table, index 5 is Cost Basis. index 3 is Cost/Share.
# Position-level Lot contract has 'cost_basis' field which typically means total cost.
cost_basis = _parse_decimal(cells[5] if len(cells) > 5 else None)
# lot_id isn't explicitly in the table, we'll use holding period or empty
lot_id = cells[8].strip() if len(cells) > 8 else None
lots.append(
Lot(
acquired_date=acquired_date or None,
quantity=quantity,
quantity=quantity or 0.0,
cost_basis=cost_basis,
lot_id=lot_id or None,
)
@@ -281,24 +317,40 @@ def _row_to_position(row_map: dict[str, str], lots_rows: Sequence[Sequence[str]]
description = row_map.get('description') or row_map.get('name') or row_map.get('column_1') or ""
# Price is typically in column labeled 'price' or similar
# From debug info: 'price chng $' is next, but market price was likely mapped earlier or skipped
# Actually 'price' was likely one of the empty headers that didn't have a button?
# No, debug info shows: [..., 'Qty', '', 'Price Chng $', ...]
# And cells: [..., '2,944.633', 'TITLE:04/24/2026', 'TITLE:+1.13%', ...]
# 'TITLE:04/24/2026' corresponds to the empty header between Qty and Price Chng $
# That title contains the date, but the cell text is usually the price.
market_price = _parse_decimal(
row_map.get('price')
or row_map.get('market price')
or row_map.get('last price')
or row_map.get('empty_4')
or row_map.get('empty_5')
)
# Quantity
quantity = _parse_float(row_map.get('qty') or row_map.get('quantity'))
market_value = _parse_decimal(row_map.get('mkt val') or row_map.get('market value'))
# Cost basis mapping
cost_basis_total = _parse_decimal(
row_map.get('cost basis')
or row_map.get('total cost')
)
# Quantity - now in different column due to layout change
quantity = _parse_float(row_map.get('quantity') or row_map.get('qty'))
market_value = _parse_decimal(row_map.get('market value') or row_map.get('mkt val'))
cost_basis_total = _parse_decimal(row_map.get('cost basis') or row_map.get('total cost'))
unrealized_gain = _parse_decimal(
row_map.get('gain/loss $')
or row_map.get('unrealized gain')
or row_map.get('gain/loss')
or row_map.get('empty_11') # Adjusted index
)
unrealized_gain_pct = _parse_float(
row_map.get('gain/loss %')
or row_map.get('unrealized gain %')
or row_map.get('empty_12')
)
asset_type = classify_asset(symbol, description)
@@ -341,52 +393,230 @@ async def get_positions(
if not await goto_with_auth_check(page, context, POSITIONS_URL, debug=debug):
return fail("Failed to load Schwab positions page.", ErrorType.AUTHENTICATION, retryable=True)
if account:
requested_id = account.id if isinstance(account, AccountSummary) else account
if debug:
print(f"DEBUG: Attempting to switch to account: {requested_id} via Summary page")
# Go to summary page to switch (much more stable than dropdown)
await goto_with_auth_check(page, context, "https://client.schwab.com/app/accounts/summary/#/", debug=debug)
await page.wait_for_timeout(3000)
# Find and click the account row link
clicked = await page.evaluate("""
(query) => {
const rows = Array.from(document.querySelectorAll('sdps-table-row, tr'));
const targetRow = rows.find(r => r.innerText.includes(query) || r.textContent.includes(query));
if (targetRow) {
const link = targetRow.querySelector('a.acctNavigate-button-link');
if (link) {
link.click();
return true;
}
}
return false;
}
""", requested_id)
if clicked:
if debug:
print(f"DEBUG: Clicked account {requested_id} on summary page")
await page.wait_for_timeout(5000)
else:
if debug:
print(f"DEBUG: Failed to find account {requested_id} on summary page, trying dropdown as fallback...")
from ..transactions.scraper import switch_account_on_page
await switch_account_on_page(page, requested_id, context=context, debug=debug)
# Ensure we are on positions page for the selected account
if "/accounts/positions" not in page.url:
await goto_with_auth_check(page, context, POSITIONS_URL, debug=debug)
if debug:
html = await page.content()
save_debug_artifact("positions_page_initial.html", html)
await page.wait_for_selector('#positionsDetails', timeout=45000)
await page.wait_for_timeout(1000)
# Try to expand lots using a more reliable evaluate-based approach
try:
expanded_count = await page.evaluate("""
() => {
const buttons = Array.from(document.querySelectorAll('tr.position-row sdps-button[sdps-id="costBasisTBD"] button'));
let count = 0;
buttons.forEach(btn => {
// Check if already expanded (usually has a different icon or state, but clicking again often toggles)
// For now we just click them all.
btn.click();
count++;
});
return count;
}
""")
if debug:
print(f"Clicked {expanded_count} potential lot expanders")
if expanded_count > 0:
await page.wait_for_timeout(2000) # Wait for expansion
except Exception as e:
if debug:
print(f"Error expanding lots: {e}")
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
await page.wait_for_timeout(1500)
table_data = await _evaluate_table(page)
if not table_data:
return fail("Unable to locate positions table.", ErrorType.PARSING, retryable=True)
if debug:
html = await page.content()
save_debug_artifact("positions_page_scrolled.html", html)
png = await page.screenshot(full_page=True)
save_debug_artifact("positions_page.png", png)
headers = [header.strip().lower() for header in table_data.get('headers') or []]
# 1. Get headers once
headers = await page.evaluate("""
() => {
const table = document.querySelector('#positionsDetails');
if (!table) return [];
return Array.from(table.querySelectorAll('thead tr th')).map(th => {
const btn = th.querySelector('button, .sdps-tables__header-text');
const text = (btn ? (btn.innerText || btn.textContent) : (th.innerText || th.textContent)) || '';
return text.trim().replace(/\\u00a0/g, ' ').replace(/\\u200b/g, '').split('\\n')[0].trim().toLowerCase();
});
}
""")
if not headers:
return fail("Positions table headers not found.", ErrorType.PARSING, retryable=True)
positions: list[Position] = []
# 1.5 Pre-cleanup: Close any accidentally opened modals
try:
open_modals = await page.query_selector_all('app-lot sdps-modal[sdps-id="open-lot-overlay"].sdps-modal--open')
for m in open_modals:
close = await m.query_selector('button.sdps-modal__close')
if close:
await close.click(force=True)
await page.wait_for_timeout(500)
except Exception:
pass
for row in table_data.get('rows', []):
if row.get('type') != 'position':
continue
# 2. Get all position rows metadata first to avoid stale handle issues
position_metadata = await page.evaluate("""
() => {
const rows = Array.from(document.querySelectorAll('tr.position-row'));
return rows.map((row, index) => {
const symbol = row.getAttribute('data-symbol') || '';
const cells = Array.from(row.querySelectorAll('td')).map((cell) => {
const btn = cell.querySelector('button, a, .sdps-button');
if (btn) {
const clone = btn.cloneNode(true);
clone.querySelectorAll('sup, .sdps-sr-only').forEach(el => el.remove());
let txt = clone.innerText.trim();
if (!txt && btn.hasAttribute('title')) txt = btn.getAttribute('title').trim();
return txt;
}
const titledSpan = cell.querySelector('span[title]');
const clone = cell.cloneNode(true);
clone.querySelectorAll('sup, .sdps-sr-only').forEach(el => el.remove());
let txt = clone.innerText.trim();
// If no direct text but has a title with a number, use that
if (!txt && titledSpan && titledSpan.getAttribute('title')) {
const t = titledSpan.getAttribute('title').trim();
if (t.includes('$') || /^[+-]?[\\d,.]+$/.test(t)) return t;
}
return txt;
});
return { symbol, cells, index };
});
}
""")
cells = row.get('cells') or []
symbol = row.get('symbol') or ""
account_label = row.get('account') or ""
if debug:
print(f"Found {len(position_metadata)} positions to process")
row_map = _map_row(headers, cells)
position = _row_to_position(row_map, row.get('lots') or [], symbol=symbol)
all_positions: list[Position] = []
for meta in position_metadata:
symbol = meta['symbol']
idx = meta['index']
# Filter by account if requested
if account is not None and account_label:
# Normalize the account label from the row
account_summary = _normalize_account_label(account_label)
if not _match_account(account_summary, account):
continue
elif account is not None and not account_label:
# If filtering by account but row has no account, skip it
continue
# Re-fetch row for lot expansion if needed
lots_data = []
try:
rows = await page.query_selector_all('tr.position-row')
if idx < len(rows):
row = rows[idx]
expander = await row.query_selector('sdps-button[sdps-id="costBasisTBD"] button')
if expander:
await expander.scroll_into_view_if_needed()
# Use force=True because sometimes modals/overlays block the click in Schwab's UI
await expander.click(force=True)
# Wait for modal to appear
await page.wait_for_timeout(1000)
# Find the active modal (not inert, visible)
modal_handle = None
modals = await page.query_selector_all('app-lot sdps-modal[sdps-id="open-lot-overlay"]')
for m in modals:
is_hidden = await m.evaluate('el => el.getAttribute("aria-hidden") === "true" || el.hasAttribute("inert")')
if not is_hidden:
modal_handle = m
break
if not modal_handle and modals:
modal_handle = modals[-1] # Fallback to last one
if modal_handle:
modal_id = await modal_handle.get_attribute('modal-id')
if debug:
print(f"Processing modal {modal_id} for {symbol}")
# Wait for table to be populated
try:
await modal_handle.wait_for_selector('#responsiveLotTable tbody tr.data-row', timeout=3000)
except Exception:
pass
# Extract lots from this specific modal
lots_data = await page.evaluate(f"""
(mId) => {{
const modal = document.querySelector(`app-lot sdps-modal[modal-id="${{mId}}"]`);
if (!modal) return [];
const lotTable = modal.querySelector('#responsiveLotTable');
if (!lotTable) return [];
const lotRows = Array.from(lotTable.querySelectorAll('tbody tr.data-row'));
return lotRows.map(r => {{
return Array.from(r.querySelectorAll('th, td')).map(c => {{
const clone = c.cloneNode(true);
clone.querySelectorAll('sup, .sdps-sr-only, .transactionCostColor').forEach(el => el.remove());
return clone.innerText.trim();
}});
}});
}}
""", modal_id)
# Close this specific modal
close_btn = await modal_handle.query_selector('button.sdps-modal__close')
if close_btn:
await close_btn.click(force=True)
await page.wait_for_timeout(1000)
except Exception as e:
if debug:
print(f"Error expanding lots for {symbol}: {e}")
row_map = _map_row(headers, meta['cells'])
position = _row_to_position(row_map, lots_data, symbol=symbol)
if not include_non_equity and position.asset_type not in {"EQUITY", "ETF"}:
continue
positions.append(position)
all_positions.append(position)
if not positions:
if not all_positions:
return fail("No positions matched the requested criteria.", ErrorType.VALIDATION, retryable=False)
return ok(positions)
return ok(all_positions)
except Exception as exc:
if debug:
import traceback
traceback.print_exc()
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
finally:
await _safe_close_page(page)

View File

@@ -1160,7 +1160,7 @@ async def switch_account_via_api(page, account_number: str, debug: bool = False)
return False
async def switch_account_on_page(page, account_query: Optional[str], debug: bool = False) -> bool:
async def switch_account_on_page(page, account_query: Optional[str], context=None, debug: bool = False) -> bool:
"""Attempt to switch account using the page-level selector given a query like '604' or 'Joint'."""
if not account_query:
return False
@@ -1176,7 +1176,7 @@ async def switch_account_on_page(page, account_query: Optional[str], debug: bool
if 'accounts/history' not in page.url:
if debug:
print("DEBUG: Not on history page, navigating...")
await goto_history(page, debug=debug)
await goto_history(page, context=context, debug=debug)
# ENHANCED DEBUGGING: Take screenshot before attempting switch
if debug:
@@ -1221,8 +1221,11 @@ async def switch_account_on_page(page, account_query: Optional[str], debug: bool
for (const button of elements) {
if (button.offsetParent !== null && button.offsetWidth > 0 && button.offsetHeight > 0) {
try {
button.scrollIntoView({ behavior: 'smooth', block: 'center' });
button.click();
button.scrollIntoView({ behavior: 'auto', block: 'center' });
// Use a slight delay before clicking to avoid context destruction issues
setTimeout(() => {
try { button.click(); } catch(e) {}
}, 10);
return { success: true, selector: selector, text: (button.textContent || '').trim().substring(0, 50) };
} catch (e) {
continue;
@@ -2023,7 +2026,42 @@ async def switch_account_on_page(page, account_query: Optional[str], debug: bool
if debug:
print(f"DEBUG: Keyboard navigation failed: {e}")
if not account_clicked and debug:
if not account_clicked:
if debug:
print(f"DEBUG: All primary switch methods failed for {account_query}, attempting Summary page fallback...")
try:
# Go to summary page if not already there
if "accounts/summary" not in page.url:
await page.goto("https://client.schwab.com/app/accounts/summary/#/")
await page.wait_for_timeout(5000)
# Find the row for this account in the summary table and click its link
clicked_summary = await page.evaluate("""
(query) => {
const rows = Array.from(document.querySelectorAll('sdps-table-row, tr'));
const targetRow = rows.find(r => r.innerText.includes(query) || r.textContent.includes(query));
if (targetRow) {
const link = targetRow.querySelector('a.acctNavigate-button-link');
if (link) {
link.click();
return true;
}
}
return false;
}
""", account_query)
if clicked_summary:
if debug:
print(f"DEBUG: Successfully clicked account {account_query} on summary page")
await page.wait_for_timeout(5000)
return True
except Exception as summary_err:
if debug:
print(f"DEBUG: Summary page fallback failed: {summary_err}")
if debug:
print(f"DEBUG: Could not find and click/select target account: {target_account['label']}")
print(f"DEBUG: Target account details: {target_account}")