Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
This commit is contained in:
426
schwab_scraper/features/accounts_positions/overview_scraper.py
Normal file
426
schwab_scraper/features/accounts_positions/overview_scraper.py
Normal file
@@ -0,0 +1,426 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any, Optional, Sequence
|
||||
|
||||
from ...browser.auth import ensure_cookies
|
||||
from ...browser.client import connect, new_context, new_page
|
||||
from ...browser.navigation import goto_with_auth_check
|
||||
from ...core import AccountOverview, AccountSummary, Envelope, ErrorType, fail, ok
|
||||
from ...core.config import get_playwright_url, load_config
|
||||
|
||||
SUMMARY_URL = "https://client.schwab.com/accounts/summary/summary.aspx/"
|
||||
|
||||
|
||||
def _parse_currency(value: str | None) -> Optional[Decimal]:
|
||||
if not value:
|
||||
return None
|
||||
|
||||
cleaned = value.strip()
|
||||
if not cleaned or cleaned in {"-", "--"}:
|
||||
return None
|
||||
|
||||
negative = False
|
||||
if cleaned.startswith("(") and cleaned.endswith(")"):
|
||||
negative = True
|
||||
cleaned = cleaned.replace("$", "").replace(",", "")
|
||||
cleaned = cleaned.replace("(", "").replace(")", "")
|
||||
cleaned = cleaned.replace("−", "-").strip()
|
||||
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = Decimal(cleaned)
|
||||
if negative or parsed < 0:
|
||||
parsed = -abs(parsed)
|
||||
return parsed
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_percentage(value: str | None) -> Optional[float]:
|
||||
if not value:
|
||||
return None
|
||||
cleaned = value.strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
negative = False
|
||||
if cleaned.startswith("(") and cleaned.endswith(")"):
|
||||
negative = True
|
||||
|
||||
cleaned = cleaned.replace("%", "").replace("(", "").replace(")", "")
|
||||
cleaned = cleaned.replace("−", "-").strip()
|
||||
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = float(cleaned)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
if negative or parsed < 0:
|
||||
parsed = -abs(parsed)
|
||||
return parsed
|
||||
|
||||
|
||||
def _normalize_account_label(label: str) -> AccountSummary:
|
||||
normalized = re.sub(r"\s+", " ", label).strip()
|
||||
last4_match = re.search(r"(\d{3,4})\b", normalized.replace(" ", ""))
|
||||
last4 = last4_match.group(1)[-4:] if last4_match else None
|
||||
|
||||
type_match = re.search(r"^[A-Za-z&'\- ]+", normalized)
|
||||
account_type = re.sub(r"\s+", "_", type_match.group(0).strip()) if type_match else "Account"
|
||||
|
||||
account_id = f"{account_type}-{last4}" if last4 else account_type
|
||||
|
||||
return AccountSummary(
|
||||
id=account_id,
|
||||
label=normalized,
|
||||
type=account_type,
|
||||
last4=last4,
|
||||
is_margin="margin" in normalized.lower(),
|
||||
)
|
||||
|
||||
|
||||
def _match_account(candidate: AccountSummary, requested: AccountSummary | str | None) -> bool:
|
||||
if requested is None:
|
||||
return True
|
||||
if isinstance(requested, AccountSummary):
|
||||
requested_values = {
|
||||
requested.id.lower(),
|
||||
requested.label.lower(),
|
||||
}
|
||||
if requested.last4:
|
||||
requested_values.add(requested.last4.lower())
|
||||
else:
|
||||
lookup = requested.strip().lower()
|
||||
requested_values = {lookup}
|
||||
|
||||
candidate_values = {candidate.id.lower(), candidate.label.lower()}
|
||||
if candidate.last4:
|
||||
candidate_values.add(candidate.last4.lower())
|
||||
|
||||
return bool(candidate_values & requested_values)
|
||||
|
||||
|
||||
def _rows_to_dicts(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> list[dict[str, str]]:
|
||||
normalized_headers = [header.strip().lower() for header in headers]
|
||||
results: list[dict[str, str]] = []
|
||||
for row in rows:
|
||||
row_map: dict[str, str] = {}
|
||||
for idx, header in enumerate(normalized_headers):
|
||||
if idx < len(row):
|
||||
row_map[header] = row[idx].strip()
|
||||
results.append(row_map)
|
||||
return results
|
||||
|
||||
|
||||
async def _extract_table(page) -> dict[str, Any] | None:
|
||||
return await page.evaluate(
|
||||
"""
|
||||
() => {
|
||||
const wrapper = document.querySelector('.sdps-tables__wrapper');
|
||||
if (!wrapper) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const headerRow = wrapper.querySelector('.sdps-tables__row--header');
|
||||
const headers = headerRow
|
||||
? Array.from(headerRow.querySelectorAll('.sdps-tables__header-text'))
|
||||
.map((el) => (el.textContent || '').trim())
|
||||
: [];
|
||||
|
||||
if (!headers.length) {
|
||||
const legacyHeaders = wrapper.querySelectorAll('thead th');
|
||||
if (legacyHeaders.length) {
|
||||
for (const th of legacyHeaders) {
|
||||
headers.push((th.textContent || '').trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bodyRows = wrapper.querySelectorAll('.sdps-tables__row--body');
|
||||
const rows = [];
|
||||
if (bodyRows.length) {
|
||||
bodyRows.forEach((row) => {
|
||||
const cells = Array.from(
|
||||
row.querySelectorAll('.sdps-tables__cell, div[role="cell"], td')
|
||||
).map((cell) => (cell.textContent || '').trim());
|
||||
rows.push(cells);
|
||||
});
|
||||
}
|
||||
|
||||
if (!rows.length) {
|
||||
const fallbackRows = wrapper.querySelectorAll('tbody tr');
|
||||
fallbackRows.forEach((row) => {
|
||||
const cells = Array.from(row.querySelectorAll('td')).map((cell) => (cell.textContent || '').trim());
|
||||
if (cells.length) {
|
||||
rows.push(cells);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return { headers, rows };
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def _extract_totals(page) -> dict[str, str | None]:
|
||||
return await page.evaluate(
|
||||
r"""
|
||||
() => {
|
||||
const result = { total: null, dayChange: null, dayChangePct: null, cash: null };
|
||||
|
||||
const totalLabel = document.querySelector('#total-value-label');
|
||||
if (totalLabel) {
|
||||
const valueEl = totalLabel.closest('[class*="sdps-panel"], h2, div');
|
||||
if (valueEl) {
|
||||
const currencyMatch = valueEl.textContent?.match(/\$[\d,]+\.?\d*/);
|
||||
if (currencyMatch) {
|
||||
result.total = currencyMatch[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const dayChangeLabel = document.querySelector('#day-change-label');
|
||||
if (dayChangeLabel) {
|
||||
const container = dayChangeLabel.parentElement;
|
||||
if (container) {
|
||||
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
|
||||
const matchPct = container.textContent?.match(/-?\d+(?:\.\d+)?%/);
|
||||
if (matchCurrency) {
|
||||
result.dayChange = matchCurrency[0];
|
||||
}
|
||||
if (matchPct) {
|
||||
result.dayChangePct = matchPct[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cashLabel = Array.from(document.querySelectorAll('.sdps-tables__header-text')).find((el) =>
|
||||
el.textContent?.toLowerCase().includes('cash & cash investments')
|
||||
);
|
||||
if (cashLabel) {
|
||||
const container = cashLabel.closest('div');
|
||||
if (container) {
|
||||
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
|
||||
if (matchCurrency) {
|
||||
result.cash = matchCurrency[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _row_to_overview(row_map: dict[str, str]) -> tuple[AccountSummary, AccountOverview]:
|
||||
label = row_map.get('name') or row_map.get('account') or row_map.get('account name') or row_map.get('', '')
|
||||
label = label or "Account"
|
||||
|
||||
account_summary = _normalize_account_label(label)
|
||||
|
||||
total_value = _parse_currency(
|
||||
row_map.get('account value')
|
||||
or row_map.get('total value')
|
||||
or row_map.get('market value')
|
||||
)
|
||||
|
||||
day_change = _parse_currency(
|
||||
row_map.get('day change $')
|
||||
or row_map.get('day change')
|
||||
or row_map.get('day change amount')
|
||||
)
|
||||
|
||||
day_change_pct = _parse_percentage(
|
||||
row_map.get('day change %')
|
||||
or row_map.get('day change percent')
|
||||
)
|
||||
|
||||
cash_value = _parse_currency(
|
||||
row_map.get('cash & cash investments')
|
||||
or row_map.get('cash')
|
||||
)
|
||||
|
||||
settled_cash = _parse_currency(row_map.get('settled cash'))
|
||||
buying_power = _parse_currency(row_map.get('buying power') or row_map.get('available to trade'))
|
||||
margin_balance = _parse_currency(row_map.get('margin balance') or row_map.get('margin'))
|
||||
|
||||
overview = AccountOverview(
|
||||
account=account_summary,
|
||||
total_value=total_value,
|
||||
day_change=day_change,
|
||||
day_change_pct=day_change_pct,
|
||||
cash=cash_value,
|
||||
settled_cash=settled_cash,
|
||||
buying_power=buying_power,
|
||||
margin_balance=margin_balance,
|
||||
)
|
||||
|
||||
return account_summary, overview
|
||||
|
||||
|
||||
async def get_account_overview(
|
||||
account: AccountSummary | str | None = None, *, debug: bool = False
|
||||
) -> Envelope[AccountOverview]:
|
||||
cookies = await ensure_cookies()
|
||||
if not cookies:
|
||||
return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
|
||||
playwright = browser = context = page = None
|
||||
try:
|
||||
playwright, browser = await connect(playwright_url)
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
if not await goto_with_auth_check(page, context, SUMMARY_URL, debug=debug):
|
||||
return fail("Failed to load Schwab account summary page.", ErrorType.AUTHENTICATION, retryable=True)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
table_data = await _extract_table(page)
|
||||
if not table_data:
|
||||
return fail("Unable to locate account overview table.", ErrorType.PARSING, retryable=True)
|
||||
|
||||
row_dicts = _rows_to_dicts(table_data["headers"], table_data["rows"])
|
||||
matched_overviews: list[AccountOverview] = []
|
||||
|
||||
for row_map in row_dicts:
|
||||
# Skip empty rows or totals indicated by lack of numeric data
|
||||
values = "".join(row_map.values())
|
||||
if not values:
|
||||
continue
|
||||
|
||||
summary, overview = _row_to_overview(row_map)
|
||||
if _match_account(summary, account):
|
||||
matched_overviews.append(overview)
|
||||
|
||||
if not matched_overviews:
|
||||
return fail("Account not found in overview table.", ErrorType.VALIDATION, retryable=False)
|
||||
|
||||
if account is None and len(matched_overviews) > 1:
|
||||
aggregated = _aggregate_overviews(matched_overviews)
|
||||
totals = await _extract_totals(page)
|
||||
if totals:
|
||||
if totals.get("total"):
|
||||
aggregated.total_value = _parse_currency(totals.get("total"))
|
||||
if totals.get("dayChange"):
|
||||
aggregated.day_change = _parse_currency(totals.get("dayChange"))
|
||||
if totals.get("dayChangePct"):
|
||||
aggregated.day_change_pct = _parse_percentage(totals.get("dayChangePct"))
|
||||
if totals.get("cash"):
|
||||
aggregated.cash = _parse_currency(totals.get("cash"))
|
||||
return ok(aggregated)
|
||||
|
||||
return ok(matched_overviews[0])
|
||||
except Exception as exc:
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
finally:
|
||||
await _safe_close_page(page)
|
||||
await _safe_close_context(context)
|
||||
await _safe_close_browser(browser)
|
||||
await _safe_stop_playwright(playwright)
|
||||
|
||||
|
||||
def _aggregate_overviews(overviews: Sequence[AccountOverview]) -> AccountOverview:
|
||||
total_value = Decimal("0")
|
||||
day_change = Decimal("0")
|
||||
cash_total = Decimal("0")
|
||||
settled_total = Decimal("0")
|
||||
buying_total = Decimal("0")
|
||||
margin_total = Decimal("0")
|
||||
|
||||
for item in overviews:
|
||||
if item.total_value is not None:
|
||||
total_value += item.total_value
|
||||
if item.day_change is not None:
|
||||
day_change += item.day_change
|
||||
if item.cash is not None:
|
||||
cash_total += item.cash
|
||||
if item.settled_cash is not None:
|
||||
settled_total += item.settled_cash
|
||||
if item.buying_power is not None:
|
||||
buying_total += item.buying_power
|
||||
if item.margin_balance is not None:
|
||||
margin_total += item.margin_balance
|
||||
|
||||
aggregated_summary = AccountSummary(
|
||||
id="AGGREGATE",
|
||||
label="All Accounts",
|
||||
type="AGGREGATE",
|
||||
last4=None,
|
||||
is_margin=False,
|
||||
)
|
||||
|
||||
total_value_out = total_value if total_value != 0 else None
|
||||
day_change_out = day_change if day_change != 0 else None
|
||||
cash_out = cash_total if cash_total != 0 else None
|
||||
settled_out = settled_total if settled_total != 0 else None
|
||||
buying_out = buying_total if buying_total != 0 else None
|
||||
margin_out = margin_total if margin_total != 0 else None
|
||||
|
||||
day_change_pct: Optional[float] = None
|
||||
if total_value_out and day_change_out:
|
||||
try:
|
||||
day_change_pct = float((day_change_out / total_value_out) * 100)
|
||||
except (InvalidOperation, ZeroDivisionError):
|
||||
day_change_pct = None
|
||||
|
||||
return AccountOverview(
|
||||
account=aggregated_summary,
|
||||
total_value=total_value_out,
|
||||
day_change=day_change_out,
|
||||
day_change_pct=day_change_pct,
|
||||
cash=cash_out,
|
||||
settled_cash=settled_out,
|
||||
buying_power=buying_out,
|
||||
margin_balance=margin_out,
|
||||
)
|
||||
|
||||
|
||||
async def _safe_close_page(page) -> None:
|
||||
if page is None:
|
||||
return
|
||||
try:
|
||||
await page.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_context(context) -> None:
|
||||
if context is None:
|
||||
return
|
||||
try:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_browser(browser) -> None:
|
||||
if browser is None:
|
||||
return
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_stop_playwright(playwright) -> None:
|
||||
if playwright is None:
|
||||
return
|
||||
try:
|
||||
await playwright.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user