Files
schwab-mcp-custom/schwab_scraper/features/accounts_positions/overview_scraper.py
b3nw 650ea2d087
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
Fix build: Bundle schwab_scraper source and use local dependencies
2026-04-24 01:50:20 +00:00

427 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import asyncio
import re
from decimal import Decimal, InvalidOperation
from typing import Any, Optional, Sequence
from ...browser.auth import ensure_cookies
from ...browser.client import connect, new_context, new_page
from ...browser.navigation import goto_with_auth_check
from ...core import AccountOverview, AccountSummary, Envelope, ErrorType, fail, ok
from ...core.config import get_playwright_url, load_config
SUMMARY_URL = "https://client.schwab.com/accounts/summary/summary.aspx/"
def _parse_currency(value: str | None) -> Optional[Decimal]:
if not value:
return None
cleaned = value.strip()
if not cleaned or cleaned in {"-", "--"}:
return None
negative = False
if cleaned.startswith("(") and cleaned.endswith(")"):
negative = True
cleaned = cleaned.replace("$", "").replace(",", "")
cleaned = cleaned.replace("(", "").replace(")", "")
cleaned = cleaned.replace("", "-").strip()
if not cleaned:
return None
try:
parsed = Decimal(cleaned)
if negative or parsed < 0:
parsed = -abs(parsed)
return parsed
except InvalidOperation:
return None
def _parse_percentage(value: str | None) -> Optional[float]:
if not value:
return None
cleaned = value.strip()
if not cleaned:
return None
negative = False
if cleaned.startswith("(") and cleaned.endswith(")"):
negative = True
cleaned = cleaned.replace("%", "").replace("(", "").replace(")", "")
cleaned = cleaned.replace("", "-").strip()
if not cleaned:
return None
try:
parsed = float(cleaned)
except ValueError:
return None
if negative or parsed < 0:
parsed = -abs(parsed)
return parsed
def _normalize_account_label(label: str) -> AccountSummary:
normalized = re.sub(r"\s+", " ", label).strip()
last4_match = re.search(r"(\d{3,4})\b", normalized.replace(" ", ""))
last4 = last4_match.group(1)[-4:] if last4_match else None
type_match = re.search(r"^[A-Za-z&'\- ]+", normalized)
account_type = re.sub(r"\s+", "_", type_match.group(0).strip()) if type_match else "Account"
account_id = f"{account_type}-{last4}" if last4 else account_type
return AccountSummary(
id=account_id,
label=normalized,
type=account_type,
last4=last4,
is_margin="margin" in normalized.lower(),
)
def _match_account(candidate: AccountSummary, requested: AccountSummary | str | None) -> bool:
if requested is None:
return True
if isinstance(requested, AccountSummary):
requested_values = {
requested.id.lower(),
requested.label.lower(),
}
if requested.last4:
requested_values.add(requested.last4.lower())
else:
lookup = requested.strip().lower()
requested_values = {lookup}
candidate_values = {candidate.id.lower(), candidate.label.lower()}
if candidate.last4:
candidate_values.add(candidate.last4.lower())
return bool(candidate_values & requested_values)
def _rows_to_dicts(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> list[dict[str, str]]:
normalized_headers = [header.strip().lower() for header in headers]
results: list[dict[str, str]] = []
for row in rows:
row_map: dict[str, str] = {}
for idx, header in enumerate(normalized_headers):
if idx < len(row):
row_map[header] = row[idx].strip()
results.append(row_map)
return results
async def _extract_table(page) -> dict[str, Any] | None:
return await page.evaluate(
"""
() => {
const wrapper = document.querySelector('.sdps-tables__wrapper');
if (!wrapper) {
return null;
}
const headerRow = wrapper.querySelector('.sdps-tables__row--header');
const headers = headerRow
? Array.from(headerRow.querySelectorAll('.sdps-tables__header-text'))
.map((el) => (el.textContent || '').trim())
: [];
if (!headers.length) {
const legacyHeaders = wrapper.querySelectorAll('thead th');
if (legacyHeaders.length) {
for (const th of legacyHeaders) {
headers.push((th.textContent || '').trim());
}
}
}
const bodyRows = wrapper.querySelectorAll('.sdps-tables__row--body');
const rows = [];
if (bodyRows.length) {
bodyRows.forEach((row) => {
const cells = Array.from(
row.querySelectorAll('.sdps-tables__cell, div[role="cell"], td')
).map((cell) => (cell.textContent || '').trim());
rows.push(cells);
});
}
if (!rows.length) {
const fallbackRows = wrapper.querySelectorAll('tbody tr');
fallbackRows.forEach((row) => {
const cells = Array.from(row.querySelectorAll('td')).map((cell) => (cell.textContent || '').trim());
if (cells.length) {
rows.push(cells);
}
});
}
return { headers, rows };
}
"""
)
async def _extract_totals(page) -> dict[str, str | None]:
return await page.evaluate(
r"""
() => {
const result = { total: null, dayChange: null, dayChangePct: null, cash: null };
const totalLabel = document.querySelector('#total-value-label');
if (totalLabel) {
const valueEl = totalLabel.closest('[class*="sdps-panel"], h2, div');
if (valueEl) {
const currencyMatch = valueEl.textContent?.match(/\$[\d,]+\.?\d*/);
if (currencyMatch) {
result.total = currencyMatch[0];
}
}
}
const dayChangeLabel = document.querySelector('#day-change-label');
if (dayChangeLabel) {
const container = dayChangeLabel.parentElement;
if (container) {
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
const matchPct = container.textContent?.match(/-?\d+(?:\.\d+)?%/);
if (matchCurrency) {
result.dayChange = matchCurrency[0];
}
if (matchPct) {
result.dayChangePct = matchPct[0];
}
}
}
const cashLabel = Array.from(document.querySelectorAll('.sdps-tables__header-text')).find((el) =>
el.textContent?.toLowerCase().includes('cash & cash investments')
);
if (cashLabel) {
const container = cashLabel.closest('div');
if (container) {
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
if (matchCurrency) {
result.cash = matchCurrency[0];
}
}
}
return result;
}
"""
)
def _row_to_overview(row_map: dict[str, str]) -> tuple[AccountSummary, AccountOverview]:
label = row_map.get('name') or row_map.get('account') or row_map.get('account name') or row_map.get('', '')
label = label or "Account"
account_summary = _normalize_account_label(label)
total_value = _parse_currency(
row_map.get('account value')
or row_map.get('total value')
or row_map.get('market value')
)
day_change = _parse_currency(
row_map.get('day change $')
or row_map.get('day change')
or row_map.get('day change amount')
)
day_change_pct = _parse_percentage(
row_map.get('day change %')
or row_map.get('day change percent')
)
cash_value = _parse_currency(
row_map.get('cash & cash investments')
or row_map.get('cash')
)
settled_cash = _parse_currency(row_map.get('settled cash'))
buying_power = _parse_currency(row_map.get('buying power') or row_map.get('available to trade'))
margin_balance = _parse_currency(row_map.get('margin balance') or row_map.get('margin'))
overview = AccountOverview(
account=account_summary,
total_value=total_value,
day_change=day_change,
day_change_pct=day_change_pct,
cash=cash_value,
settled_cash=settled_cash,
buying_power=buying_power,
margin_balance=margin_balance,
)
return account_summary, overview
async def get_account_overview(
account: AccountSummary | str | None = None, *, debug: bool = False
) -> Envelope[AccountOverview]:
cookies = await ensure_cookies()
if not cookies:
return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
config = load_config()
playwright_url = get_playwright_url(config)
playwright = browser = context = page = None
try:
playwright, browser = await connect(playwright_url)
context = await new_context(browser, cookies=cookies)
page = await new_page(context)
if not await goto_with_auth_check(page, context, SUMMARY_URL, debug=debug):
return fail("Failed to load Schwab account summary page.", ErrorType.AUTHENTICATION, retryable=True)
await asyncio.sleep(1)
table_data = await _extract_table(page)
if not table_data:
return fail("Unable to locate account overview table.", ErrorType.PARSING, retryable=True)
row_dicts = _rows_to_dicts(table_data["headers"], table_data["rows"])
matched_overviews: list[AccountOverview] = []
for row_map in row_dicts:
# Skip empty rows or totals indicated by lack of numeric data
values = "".join(row_map.values())
if not values:
continue
summary, overview = _row_to_overview(row_map)
if _match_account(summary, account):
matched_overviews.append(overview)
if not matched_overviews:
return fail("Account not found in overview table.", ErrorType.VALIDATION, retryable=False)
if account is None and len(matched_overviews) > 1:
aggregated = _aggregate_overviews(matched_overviews)
totals = await _extract_totals(page)
if totals:
if totals.get("total"):
aggregated.total_value = _parse_currency(totals.get("total"))
if totals.get("dayChange"):
aggregated.day_change = _parse_currency(totals.get("dayChange"))
if totals.get("dayChangePct"):
aggregated.day_change_pct = _parse_percentage(totals.get("dayChangePct"))
if totals.get("cash"):
aggregated.cash = _parse_currency(totals.get("cash"))
return ok(aggregated)
return ok(matched_overviews[0])
except Exception as exc:
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
finally:
await _safe_close_page(page)
await _safe_close_context(context)
await _safe_close_browser(browser)
await _safe_stop_playwright(playwright)
def _aggregate_overviews(overviews: Sequence[AccountOverview]) -> AccountOverview:
total_value = Decimal("0")
day_change = Decimal("0")
cash_total = Decimal("0")
settled_total = Decimal("0")
buying_total = Decimal("0")
margin_total = Decimal("0")
for item in overviews:
if item.total_value is not None:
total_value += item.total_value
if item.day_change is not None:
day_change += item.day_change
if item.cash is not None:
cash_total += item.cash
if item.settled_cash is not None:
settled_total += item.settled_cash
if item.buying_power is not None:
buying_total += item.buying_power
if item.margin_balance is not None:
margin_total += item.margin_balance
aggregated_summary = AccountSummary(
id="AGGREGATE",
label="All Accounts",
type="AGGREGATE",
last4=None,
is_margin=False,
)
total_value_out = total_value if total_value != 0 else None
day_change_out = day_change if day_change != 0 else None
cash_out = cash_total if cash_total != 0 else None
settled_out = settled_total if settled_total != 0 else None
buying_out = buying_total if buying_total != 0 else None
margin_out = margin_total if margin_total != 0 else None
day_change_pct: Optional[float] = None
if total_value_out and day_change_out:
try:
day_change_pct = float((day_change_out / total_value_out) * 100)
except (InvalidOperation, ZeroDivisionError):
day_change_pct = None
return AccountOverview(
account=aggregated_summary,
total_value=total_value_out,
day_change=day_change_out,
day_change_pct=day_change_pct,
cash=cash_out,
settled_cash=settled_out,
buying_power=buying_out,
margin_balance=margin_out,
)
async def _safe_close_page(page) -> None:
if page is None:
return
try:
await page.close()
except Exception:
pass
async def _safe_close_context(context) -> None:
if context is None:
return
try:
await context.close()
except Exception:
pass
async def _safe_close_browser(browser) -> None:
if browser is None:
return
try:
await browser.close()
except Exception:
pass
async def _safe_stop_playwright(playwright) -> None:
if playwright is None:
return
try:
await playwright.stop()
except Exception:
pass