Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s

This commit is contained in:
2026-04-24 01:50:20 +00:00
parent 02ac293692
commit 650ea2d087
43 changed files with 10900 additions and 41 deletions

View File

@@ -0,0 +1,426 @@
from __future__ import annotations
import asyncio
import re
from decimal import Decimal, InvalidOperation
from typing import Any, Optional, Sequence
from ...browser.auth import ensure_cookies
from ...browser.client import connect, new_context, new_page
from ...browser.navigation import goto_with_auth_check
from ...core import AccountOverview, AccountSummary, Envelope, ErrorType, fail, ok
from ...core.config import get_playwright_url, load_config
SUMMARY_URL = "https://client.schwab.com/accounts/summary/summary.aspx/"
def _parse_currency(value: str | None) -> Optional[Decimal]:
if not value:
return None
cleaned = value.strip()
if not cleaned or cleaned in {"-", "--"}:
return None
negative = False
if cleaned.startswith("(") and cleaned.endswith(")"):
negative = True
cleaned = cleaned.replace("$", "").replace(",", "")
cleaned = cleaned.replace("(", "").replace(")", "")
cleaned = cleaned.replace("", "-").strip()
if not cleaned:
return None
try:
parsed = Decimal(cleaned)
if negative or parsed < 0:
parsed = -abs(parsed)
return parsed
except InvalidOperation:
return None
def _parse_percentage(value: str | None) -> Optional[float]:
if not value:
return None
cleaned = value.strip()
if not cleaned:
return None
negative = False
if cleaned.startswith("(") and cleaned.endswith(")"):
negative = True
cleaned = cleaned.replace("%", "").replace("(", "").replace(")", "")
cleaned = cleaned.replace("", "-").strip()
if not cleaned:
return None
try:
parsed = float(cleaned)
except ValueError:
return None
if negative or parsed < 0:
parsed = -abs(parsed)
return parsed
def _normalize_account_label(label: str) -> AccountSummary:
normalized = re.sub(r"\s+", " ", label).strip()
last4_match = re.search(r"(\d{3,4})\b", normalized.replace(" ", ""))
last4 = last4_match.group(1)[-4:] if last4_match else None
type_match = re.search(r"^[A-Za-z&'\- ]+", normalized)
account_type = re.sub(r"\s+", "_", type_match.group(0).strip()) if type_match else "Account"
account_id = f"{account_type}-{last4}" if last4 else account_type
return AccountSummary(
id=account_id,
label=normalized,
type=account_type,
last4=last4,
is_margin="margin" in normalized.lower(),
)
def _match_account(candidate: AccountSummary, requested: AccountSummary | str | None) -> bool:
if requested is None:
return True
if isinstance(requested, AccountSummary):
requested_values = {
requested.id.lower(),
requested.label.lower(),
}
if requested.last4:
requested_values.add(requested.last4.lower())
else:
lookup = requested.strip().lower()
requested_values = {lookup}
candidate_values = {candidate.id.lower(), candidate.label.lower()}
if candidate.last4:
candidate_values.add(candidate.last4.lower())
return bool(candidate_values & requested_values)
def _rows_to_dicts(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> list[dict[str, str]]:
normalized_headers = [header.strip().lower() for header in headers]
results: list[dict[str, str]] = []
for row in rows:
row_map: dict[str, str] = {}
for idx, header in enumerate(normalized_headers):
if idx < len(row):
row_map[header] = row[idx].strip()
results.append(row_map)
return results
async def _extract_table(page) -> dict[str, Any] | None:
return await page.evaluate(
"""
() => {
const wrapper = document.querySelector('.sdps-tables__wrapper');
if (!wrapper) {
return null;
}
const headerRow = wrapper.querySelector('.sdps-tables__row--header');
const headers = headerRow
? Array.from(headerRow.querySelectorAll('.sdps-tables__header-text'))
.map((el) => (el.textContent || '').trim())
: [];
if (!headers.length) {
const legacyHeaders = wrapper.querySelectorAll('thead th');
if (legacyHeaders.length) {
for (const th of legacyHeaders) {
headers.push((th.textContent || '').trim());
}
}
}
const bodyRows = wrapper.querySelectorAll('.sdps-tables__row--body');
const rows = [];
if (bodyRows.length) {
bodyRows.forEach((row) => {
const cells = Array.from(
row.querySelectorAll('.sdps-tables__cell, div[role="cell"], td')
).map((cell) => (cell.textContent || '').trim());
rows.push(cells);
});
}
if (!rows.length) {
const fallbackRows = wrapper.querySelectorAll('tbody tr');
fallbackRows.forEach((row) => {
const cells = Array.from(row.querySelectorAll('td')).map((cell) => (cell.textContent || '').trim());
if (cells.length) {
rows.push(cells);
}
});
}
return { headers, rows };
}
"""
)
async def _extract_totals(page) -> dict[str, str | None]:
return await page.evaluate(
r"""
() => {
const result = { total: null, dayChange: null, dayChangePct: null, cash: null };
const totalLabel = document.querySelector('#total-value-label');
if (totalLabel) {
const valueEl = totalLabel.closest('[class*="sdps-panel"], h2, div');
if (valueEl) {
const currencyMatch = valueEl.textContent?.match(/\$[\d,]+\.?\d*/);
if (currencyMatch) {
result.total = currencyMatch[0];
}
}
}
const dayChangeLabel = document.querySelector('#day-change-label');
if (dayChangeLabel) {
const container = dayChangeLabel.parentElement;
if (container) {
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
const matchPct = container.textContent?.match(/-?\d+(?:\.\d+)?%/);
if (matchCurrency) {
result.dayChange = matchCurrency[0];
}
if (matchPct) {
result.dayChangePct = matchPct[0];
}
}
}
const cashLabel = Array.from(document.querySelectorAll('.sdps-tables__header-text')).find((el) =>
el.textContent?.toLowerCase().includes('cash & cash investments')
);
if (cashLabel) {
const container = cashLabel.closest('div');
if (container) {
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
if (matchCurrency) {
result.cash = matchCurrency[0];
}
}
}
return result;
}
"""
)
def _row_to_overview(row_map: dict[str, str]) -> tuple[AccountSummary, AccountOverview]:
label = row_map.get('name') or row_map.get('account') or row_map.get('account name') or row_map.get('', '')
label = label or "Account"
account_summary = _normalize_account_label(label)
total_value = _parse_currency(
row_map.get('account value')
or row_map.get('total value')
or row_map.get('market value')
)
day_change = _parse_currency(
row_map.get('day change $')
or row_map.get('day change')
or row_map.get('day change amount')
)
day_change_pct = _parse_percentage(
row_map.get('day change %')
or row_map.get('day change percent')
)
cash_value = _parse_currency(
row_map.get('cash & cash investments')
or row_map.get('cash')
)
settled_cash = _parse_currency(row_map.get('settled cash'))
buying_power = _parse_currency(row_map.get('buying power') or row_map.get('available to trade'))
margin_balance = _parse_currency(row_map.get('margin balance') or row_map.get('margin'))
overview = AccountOverview(
account=account_summary,
total_value=total_value,
day_change=day_change,
day_change_pct=day_change_pct,
cash=cash_value,
settled_cash=settled_cash,
buying_power=buying_power,
margin_balance=margin_balance,
)
return account_summary, overview
async def get_account_overview(
account: AccountSummary | str | None = None, *, debug: bool = False
) -> Envelope[AccountOverview]:
cookies = await ensure_cookies()
if not cookies:
return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
config = load_config()
playwright_url = get_playwright_url(config)
playwright = browser = context = page = None
try:
playwright, browser = await connect(playwright_url)
context = await new_context(browser, cookies=cookies)
page = await new_page(context)
if not await goto_with_auth_check(page, context, SUMMARY_URL, debug=debug):
return fail("Failed to load Schwab account summary page.", ErrorType.AUTHENTICATION, retryable=True)
await asyncio.sleep(1)
table_data = await _extract_table(page)
if not table_data:
return fail("Unable to locate account overview table.", ErrorType.PARSING, retryable=True)
row_dicts = _rows_to_dicts(table_data["headers"], table_data["rows"])
matched_overviews: list[AccountOverview] = []
for row_map in row_dicts:
# Skip empty rows or totals indicated by lack of numeric data
values = "".join(row_map.values())
if not values:
continue
summary, overview = _row_to_overview(row_map)
if _match_account(summary, account):
matched_overviews.append(overview)
if not matched_overviews:
return fail("Account not found in overview table.", ErrorType.VALIDATION, retryable=False)
if account is None and len(matched_overviews) > 1:
aggregated = _aggregate_overviews(matched_overviews)
totals = await _extract_totals(page)
if totals:
if totals.get("total"):
aggregated.total_value = _parse_currency(totals.get("total"))
if totals.get("dayChange"):
aggregated.day_change = _parse_currency(totals.get("dayChange"))
if totals.get("dayChangePct"):
aggregated.day_change_pct = _parse_percentage(totals.get("dayChangePct"))
if totals.get("cash"):
aggregated.cash = _parse_currency(totals.get("cash"))
return ok(aggregated)
return ok(matched_overviews[0])
except Exception as exc:
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
finally:
await _safe_close_page(page)
await _safe_close_context(context)
await _safe_close_browser(browser)
await _safe_stop_playwright(playwright)
def _aggregate_overviews(overviews: Sequence[AccountOverview]) -> AccountOverview:
total_value = Decimal("0")
day_change = Decimal("0")
cash_total = Decimal("0")
settled_total = Decimal("0")
buying_total = Decimal("0")
margin_total = Decimal("0")
for item in overviews:
if item.total_value is not None:
total_value += item.total_value
if item.day_change is not None:
day_change += item.day_change
if item.cash is not None:
cash_total += item.cash
if item.settled_cash is not None:
settled_total += item.settled_cash
if item.buying_power is not None:
buying_total += item.buying_power
if item.margin_balance is not None:
margin_total += item.margin_balance
aggregated_summary = AccountSummary(
id="AGGREGATE",
label="All Accounts",
type="AGGREGATE",
last4=None,
is_margin=False,
)
total_value_out = total_value if total_value != 0 else None
day_change_out = day_change if day_change != 0 else None
cash_out = cash_total if cash_total != 0 else None
settled_out = settled_total if settled_total != 0 else None
buying_out = buying_total if buying_total != 0 else None
margin_out = margin_total if margin_total != 0 else None
day_change_pct: Optional[float] = None
if total_value_out and day_change_out:
try:
day_change_pct = float((day_change_out / total_value_out) * 100)
except (InvalidOperation, ZeroDivisionError):
day_change_pct = None
return AccountOverview(
account=aggregated_summary,
total_value=total_value_out,
day_change=day_change_out,
day_change_pct=day_change_pct,
cash=cash_out,
settled_cash=settled_out,
buying_power=buying_out,
margin_balance=margin_out,
)
async def _safe_close_page(page) -> None:
if page is None:
return
try:
await page.close()
except Exception:
pass
async def _safe_close_context(context) -> None:
if context is None:
return
try:
await context.close()
except Exception:
pass
async def _safe_close_browser(browser) -> None:
if browser is None:
return
try:
await browser.close()
except Exception:
pass
async def _safe_stop_playwright(playwright) -> None:
if playwright is None:
return
try:
await playwright.stop()
except Exception:
pass