Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
This commit is contained in:
14
schwab_scraper/features/accounts_positions/__init__.py
Normal file
14
schwab_scraper/features/accounts_positions/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""Unified accounts and positions feature package."""
|
||||
|
||||
from .accounts_scraper import list_accounts
|
||||
from .overview_scraper import get_account_overview
|
||||
from .positions_scraper import get_positions
|
||||
from .portfolio_scraper import get_portfolio_snapshot
|
||||
|
||||
__all__ = [
|
||||
"list_accounts",
|
||||
"get_account_overview",
|
||||
"get_positions",
|
||||
"get_portfolio_snapshot",
|
||||
]
|
||||
|
||||
153
schwab_scraper/features/accounts_positions/accounts_scraper.py
Normal file
153
schwab_scraper/features/accounts_positions/accounts_scraper.py
Normal file
@@ -0,0 +1,153 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from ...core import AccountSummary, Envelope, ErrorType, fail, ok
|
||||
from ...browser.client import connect, new_context, new_page
|
||||
from ...browser.navigation import goto_with_auth_check
|
||||
from ...browser.auth import ensure_cookies
|
||||
from ...core.config import get_playwright_url, load_config
|
||||
|
||||
# Use the same URL as transactions feature for consistency and reliability
|
||||
TRANSACTION_HISTORY_URL = "https://client.schwab.com/app/accounts/history/#/"
|
||||
|
||||
|
||||
def _normalize_account_option(text: str, value: str) -> Optional[AccountSummary]:
|
||||
text = text.strip()
|
||||
if not text:
|
||||
return None
|
||||
|
||||
normalized_text = re.sub(r"\s+", " ", text)
|
||||
|
||||
last4_match = re.search(r"(\d{3,4})", normalized_text.replace(" ", ""))
|
||||
last4 = last4_match.group(1)[-4:] if last4_match else None
|
||||
|
||||
type_match = re.search(r"^([A-Za-z&'\- ]+)", normalized_text)
|
||||
account_type = (type_match.group(1).strip() if type_match else "Account").replace(" ", "_")
|
||||
|
||||
account_id_candidates = [candidate for candidate in (value.strip(), last4, normalized_text) if candidate]
|
||||
account_id = account_id_candidates[0] if account_id_candidates else normalized_text
|
||||
|
||||
|
||||
label = normalized_text
|
||||
is_margin = "margin" in normalized_text.lower()
|
||||
|
||||
return AccountSummary(
|
||||
id=account_id,
|
||||
label=label,
|
||||
type=account_type,
|
||||
last4=last4,
|
||||
is_margin=is_margin,
|
||||
)
|
||||
|
||||
|
||||
async def list_accounts(debug: bool = False) -> Envelope[list[AccountSummary]]:
|
||||
"""
|
||||
Discover accounts from Schwab transaction history page.
|
||||
|
||||
Uses the robust account discovery logic from the transactions feature
|
||||
which handles multiple selector patterns and has enhanced reliability.
|
||||
"""
|
||||
cookies = await ensure_cookies()
|
||||
if not cookies:
|
||||
return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
|
||||
playwright = browser = context = page = None
|
||||
try:
|
||||
playwright, browser = await connect(playwright_url)
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
if not await goto_with_auth_check(page, context, TRANSACTION_HISTORY_URL, debug=debug):
|
||||
return fail("Failed to load transaction history for account discovery.", ErrorType.AUTHENTICATION, retryable=True)
|
||||
|
||||
# Allow page to fully load
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Use the robust account discovery from transactions feature
|
||||
from ..transactions.scraper import discover_accounts_from_page
|
||||
|
||||
discovered_accounts = await discover_accounts_from_page(page, debug=debug)
|
||||
|
||||
if not discovered_accounts:
|
||||
return fail("Account dropdown not found on transaction history page.", ErrorType.PARSING, retryable=True)
|
||||
|
||||
# Convert discovered accounts to AccountSummary objects
|
||||
accounts: list[AccountSummary] = []
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
for acc in discovered_accounts:
|
||||
# Create AccountSummary from discovered account info
|
||||
account_id = acc.get('ending', acc.get('label', ''))
|
||||
|
||||
if account_id and account_id not in seen_ids:
|
||||
summary = AccountSummary(
|
||||
id=account_id,
|
||||
label=acc.get('label', ''),
|
||||
type=acc.get('type', 'Account'),
|
||||
last4=acc.get('ending', ''),
|
||||
is_margin=False, # Will be enhanced in future if needed
|
||||
)
|
||||
accounts.append(summary)
|
||||
seen_ids.add(account_id)
|
||||
|
||||
if not accounts:
|
||||
return fail("No accounts discovered from Schwab transaction history.", ErrorType.PARSING, retryable=True)
|
||||
|
||||
if debug:
|
||||
print(f"DEBUG: Successfully discovered {len(accounts)} accounts:")
|
||||
for acc in accounts:
|
||||
print(f"DEBUG: - {acc.label} (type: {acc.type}, last4: {acc.last4})")
|
||||
|
||||
return ok(accounts)
|
||||
except Exception as exc:
|
||||
if debug:
|
||||
print(f"DEBUG: Account discovery error: {exc}")
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
finally:
|
||||
await _safe_close_page(page)
|
||||
await _safe_close_context(context)
|
||||
await _safe_close_browser(browser)
|
||||
await _safe_stop_playwright(playwright)
|
||||
|
||||
|
||||
async def _safe_close_page(page) -> None:
|
||||
if page is None:
|
||||
return
|
||||
try:
|
||||
await page.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_context(context) -> None:
|
||||
if context is None:
|
||||
return
|
||||
try:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_browser(browser) -> None:
|
||||
if browser is None:
|
||||
return
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_stop_playwright(playwright) -> None:
|
||||
if playwright is None:
|
||||
return
|
||||
try:
|
||||
await playwright.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
426
schwab_scraper/features/accounts_positions/overview_scraper.py
Normal file
426
schwab_scraper/features/accounts_positions/overview_scraper.py
Normal file
@@ -0,0 +1,426 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any, Optional, Sequence
|
||||
|
||||
from ...browser.auth import ensure_cookies
|
||||
from ...browser.client import connect, new_context, new_page
|
||||
from ...browser.navigation import goto_with_auth_check
|
||||
from ...core import AccountOverview, AccountSummary, Envelope, ErrorType, fail, ok
|
||||
from ...core.config import get_playwright_url, load_config
|
||||
|
||||
SUMMARY_URL = "https://client.schwab.com/accounts/summary/summary.aspx/"
|
||||
|
||||
|
||||
def _parse_currency(value: str | None) -> Optional[Decimal]:
|
||||
if not value:
|
||||
return None
|
||||
|
||||
cleaned = value.strip()
|
||||
if not cleaned or cleaned in {"-", "--"}:
|
||||
return None
|
||||
|
||||
negative = False
|
||||
if cleaned.startswith("(") and cleaned.endswith(")"):
|
||||
negative = True
|
||||
cleaned = cleaned.replace("$", "").replace(",", "")
|
||||
cleaned = cleaned.replace("(", "").replace(")", "")
|
||||
cleaned = cleaned.replace("−", "-").strip()
|
||||
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = Decimal(cleaned)
|
||||
if negative or parsed < 0:
|
||||
parsed = -abs(parsed)
|
||||
return parsed
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_percentage(value: str | None) -> Optional[float]:
|
||||
if not value:
|
||||
return None
|
||||
cleaned = value.strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
negative = False
|
||||
if cleaned.startswith("(") and cleaned.endswith(")"):
|
||||
negative = True
|
||||
|
||||
cleaned = cleaned.replace("%", "").replace("(", "").replace(")", "")
|
||||
cleaned = cleaned.replace("−", "-").strip()
|
||||
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = float(cleaned)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
if negative or parsed < 0:
|
||||
parsed = -abs(parsed)
|
||||
return parsed
|
||||
|
||||
|
||||
def _normalize_account_label(label: str) -> AccountSummary:
|
||||
normalized = re.sub(r"\s+", " ", label).strip()
|
||||
last4_match = re.search(r"(\d{3,4})\b", normalized.replace(" ", ""))
|
||||
last4 = last4_match.group(1)[-4:] if last4_match else None
|
||||
|
||||
type_match = re.search(r"^[A-Za-z&'\- ]+", normalized)
|
||||
account_type = re.sub(r"\s+", "_", type_match.group(0).strip()) if type_match else "Account"
|
||||
|
||||
account_id = f"{account_type}-{last4}" if last4 else account_type
|
||||
|
||||
return AccountSummary(
|
||||
id=account_id,
|
||||
label=normalized,
|
||||
type=account_type,
|
||||
last4=last4,
|
||||
is_margin="margin" in normalized.lower(),
|
||||
)
|
||||
|
||||
|
||||
def _match_account(candidate: AccountSummary, requested: AccountSummary | str | None) -> bool:
|
||||
if requested is None:
|
||||
return True
|
||||
if isinstance(requested, AccountSummary):
|
||||
requested_values = {
|
||||
requested.id.lower(),
|
||||
requested.label.lower(),
|
||||
}
|
||||
if requested.last4:
|
||||
requested_values.add(requested.last4.lower())
|
||||
else:
|
||||
lookup = requested.strip().lower()
|
||||
requested_values = {lookup}
|
||||
|
||||
candidate_values = {candidate.id.lower(), candidate.label.lower()}
|
||||
if candidate.last4:
|
||||
candidate_values.add(candidate.last4.lower())
|
||||
|
||||
return bool(candidate_values & requested_values)
|
||||
|
||||
|
||||
def _rows_to_dicts(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> list[dict[str, str]]:
|
||||
normalized_headers = [header.strip().lower() for header in headers]
|
||||
results: list[dict[str, str]] = []
|
||||
for row in rows:
|
||||
row_map: dict[str, str] = {}
|
||||
for idx, header in enumerate(normalized_headers):
|
||||
if idx < len(row):
|
||||
row_map[header] = row[idx].strip()
|
||||
results.append(row_map)
|
||||
return results
|
||||
|
||||
|
||||
async def _extract_table(page) -> dict[str, Any] | None:
|
||||
return await page.evaluate(
|
||||
"""
|
||||
() => {
|
||||
const wrapper = document.querySelector('.sdps-tables__wrapper');
|
||||
if (!wrapper) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const headerRow = wrapper.querySelector('.sdps-tables__row--header');
|
||||
const headers = headerRow
|
||||
? Array.from(headerRow.querySelectorAll('.sdps-tables__header-text'))
|
||||
.map((el) => (el.textContent || '').trim())
|
||||
: [];
|
||||
|
||||
if (!headers.length) {
|
||||
const legacyHeaders = wrapper.querySelectorAll('thead th');
|
||||
if (legacyHeaders.length) {
|
||||
for (const th of legacyHeaders) {
|
||||
headers.push((th.textContent || '').trim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const bodyRows = wrapper.querySelectorAll('.sdps-tables__row--body');
|
||||
const rows = [];
|
||||
if (bodyRows.length) {
|
||||
bodyRows.forEach((row) => {
|
||||
const cells = Array.from(
|
||||
row.querySelectorAll('.sdps-tables__cell, div[role="cell"], td')
|
||||
).map((cell) => (cell.textContent || '').trim());
|
||||
rows.push(cells);
|
||||
});
|
||||
}
|
||||
|
||||
if (!rows.length) {
|
||||
const fallbackRows = wrapper.querySelectorAll('tbody tr');
|
||||
fallbackRows.forEach((row) => {
|
||||
const cells = Array.from(row.querySelectorAll('td')).map((cell) => (cell.textContent || '').trim());
|
||||
if (cells.length) {
|
||||
rows.push(cells);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return { headers, rows };
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
async def _extract_totals(page) -> dict[str, str | None]:
|
||||
return await page.evaluate(
|
||||
r"""
|
||||
() => {
|
||||
const result = { total: null, dayChange: null, dayChangePct: null, cash: null };
|
||||
|
||||
const totalLabel = document.querySelector('#total-value-label');
|
||||
if (totalLabel) {
|
||||
const valueEl = totalLabel.closest('[class*="sdps-panel"], h2, div');
|
||||
if (valueEl) {
|
||||
const currencyMatch = valueEl.textContent?.match(/\$[\d,]+\.?\d*/);
|
||||
if (currencyMatch) {
|
||||
result.total = currencyMatch[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const dayChangeLabel = document.querySelector('#day-change-label');
|
||||
if (dayChangeLabel) {
|
||||
const container = dayChangeLabel.parentElement;
|
||||
if (container) {
|
||||
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
|
||||
const matchPct = container.textContent?.match(/-?\d+(?:\.\d+)?%/);
|
||||
if (matchCurrency) {
|
||||
result.dayChange = matchCurrency[0];
|
||||
}
|
||||
if (matchPct) {
|
||||
result.dayChangePct = matchPct[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const cashLabel = Array.from(document.querySelectorAll('.sdps-tables__header-text')).find((el) =>
|
||||
el.textContent?.toLowerCase().includes('cash & cash investments')
|
||||
);
|
||||
if (cashLabel) {
|
||||
const container = cashLabel.closest('div');
|
||||
if (container) {
|
||||
const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
|
||||
if (matchCurrency) {
|
||||
result.cash = matchCurrency[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _row_to_overview(row_map: dict[str, str]) -> tuple[AccountSummary, AccountOverview]:
|
||||
label = row_map.get('name') or row_map.get('account') or row_map.get('account name') or row_map.get('', '')
|
||||
label = label or "Account"
|
||||
|
||||
account_summary = _normalize_account_label(label)
|
||||
|
||||
total_value = _parse_currency(
|
||||
row_map.get('account value')
|
||||
or row_map.get('total value')
|
||||
or row_map.get('market value')
|
||||
)
|
||||
|
||||
day_change = _parse_currency(
|
||||
row_map.get('day change $')
|
||||
or row_map.get('day change')
|
||||
or row_map.get('day change amount')
|
||||
)
|
||||
|
||||
day_change_pct = _parse_percentage(
|
||||
row_map.get('day change %')
|
||||
or row_map.get('day change percent')
|
||||
)
|
||||
|
||||
cash_value = _parse_currency(
|
||||
row_map.get('cash & cash investments')
|
||||
or row_map.get('cash')
|
||||
)
|
||||
|
||||
settled_cash = _parse_currency(row_map.get('settled cash'))
|
||||
buying_power = _parse_currency(row_map.get('buying power') or row_map.get('available to trade'))
|
||||
margin_balance = _parse_currency(row_map.get('margin balance') or row_map.get('margin'))
|
||||
|
||||
overview = AccountOverview(
|
||||
account=account_summary,
|
||||
total_value=total_value,
|
||||
day_change=day_change,
|
||||
day_change_pct=day_change_pct,
|
||||
cash=cash_value,
|
||||
settled_cash=settled_cash,
|
||||
buying_power=buying_power,
|
||||
margin_balance=margin_balance,
|
||||
)
|
||||
|
||||
return account_summary, overview
|
||||
|
||||
|
||||
async def get_account_overview(
|
||||
account: AccountSummary | str | None = None, *, debug: bool = False
|
||||
) -> Envelope[AccountOverview]:
|
||||
cookies = await ensure_cookies()
|
||||
if not cookies:
|
||||
return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
|
||||
playwright = browser = context = page = None
|
||||
try:
|
||||
playwright, browser = await connect(playwright_url)
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
if not await goto_with_auth_check(page, context, SUMMARY_URL, debug=debug):
|
||||
return fail("Failed to load Schwab account summary page.", ErrorType.AUTHENTICATION, retryable=True)
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
table_data = await _extract_table(page)
|
||||
if not table_data:
|
||||
return fail("Unable to locate account overview table.", ErrorType.PARSING, retryable=True)
|
||||
|
||||
row_dicts = _rows_to_dicts(table_data["headers"], table_data["rows"])
|
||||
matched_overviews: list[AccountOverview] = []
|
||||
|
||||
for row_map in row_dicts:
|
||||
# Skip empty rows or totals indicated by lack of numeric data
|
||||
values = "".join(row_map.values())
|
||||
if not values:
|
||||
continue
|
||||
|
||||
summary, overview = _row_to_overview(row_map)
|
||||
if _match_account(summary, account):
|
||||
matched_overviews.append(overview)
|
||||
|
||||
if not matched_overviews:
|
||||
return fail("Account not found in overview table.", ErrorType.VALIDATION, retryable=False)
|
||||
|
||||
if account is None and len(matched_overviews) > 1:
|
||||
aggregated = _aggregate_overviews(matched_overviews)
|
||||
totals = await _extract_totals(page)
|
||||
if totals:
|
||||
if totals.get("total"):
|
||||
aggregated.total_value = _parse_currency(totals.get("total"))
|
||||
if totals.get("dayChange"):
|
||||
aggregated.day_change = _parse_currency(totals.get("dayChange"))
|
||||
if totals.get("dayChangePct"):
|
||||
aggregated.day_change_pct = _parse_percentage(totals.get("dayChangePct"))
|
||||
if totals.get("cash"):
|
||||
aggregated.cash = _parse_currency(totals.get("cash"))
|
||||
return ok(aggregated)
|
||||
|
||||
return ok(matched_overviews[0])
|
||||
except Exception as exc:
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
finally:
|
||||
await _safe_close_page(page)
|
||||
await _safe_close_context(context)
|
||||
await _safe_close_browser(browser)
|
||||
await _safe_stop_playwright(playwright)
|
||||
|
||||
|
||||
def _aggregate_overviews(overviews: Sequence[AccountOverview]) -> AccountOverview:
|
||||
total_value = Decimal("0")
|
||||
day_change = Decimal("0")
|
||||
cash_total = Decimal("0")
|
||||
settled_total = Decimal("0")
|
||||
buying_total = Decimal("0")
|
||||
margin_total = Decimal("0")
|
||||
|
||||
for item in overviews:
|
||||
if item.total_value is not None:
|
||||
total_value += item.total_value
|
||||
if item.day_change is not None:
|
||||
day_change += item.day_change
|
||||
if item.cash is not None:
|
||||
cash_total += item.cash
|
||||
if item.settled_cash is not None:
|
||||
settled_total += item.settled_cash
|
||||
if item.buying_power is not None:
|
||||
buying_total += item.buying_power
|
||||
if item.margin_balance is not None:
|
||||
margin_total += item.margin_balance
|
||||
|
||||
aggregated_summary = AccountSummary(
|
||||
id="AGGREGATE",
|
||||
label="All Accounts",
|
||||
type="AGGREGATE",
|
||||
last4=None,
|
||||
is_margin=False,
|
||||
)
|
||||
|
||||
total_value_out = total_value if total_value != 0 else None
|
||||
day_change_out = day_change if day_change != 0 else None
|
||||
cash_out = cash_total if cash_total != 0 else None
|
||||
settled_out = settled_total if settled_total != 0 else None
|
||||
buying_out = buying_total if buying_total != 0 else None
|
||||
margin_out = margin_total if margin_total != 0 else None
|
||||
|
||||
day_change_pct: Optional[float] = None
|
||||
if total_value_out and day_change_out:
|
||||
try:
|
||||
day_change_pct = float((day_change_out / total_value_out) * 100)
|
||||
except (InvalidOperation, ZeroDivisionError):
|
||||
day_change_pct = None
|
||||
|
||||
return AccountOverview(
|
||||
account=aggregated_summary,
|
||||
total_value=total_value_out,
|
||||
day_change=day_change_out,
|
||||
day_change_pct=day_change_pct,
|
||||
cash=cash_out,
|
||||
settled_cash=settled_out,
|
||||
buying_power=buying_out,
|
||||
margin_balance=margin_out,
|
||||
)
|
||||
|
||||
|
||||
async def _safe_close_page(page) -> None:
|
||||
if page is None:
|
||||
return
|
||||
try:
|
||||
await page.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_context(context) -> None:
|
||||
if context is None:
|
||||
return
|
||||
try:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_browser(browser) -> None:
|
||||
if browser is None:
|
||||
return
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_stop_playwright(playwright) -> None:
|
||||
if playwright is None:
|
||||
return
|
||||
try:
|
||||
await playwright.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
134
schwab_scraper/features/accounts_positions/portfolio_scraper.py
Normal file
134
schwab_scraper/features/accounts_positions/portfolio_scraper.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from ...core import AccountSummary, Envelope, ErrorType, PortfolioSnapshot, Position, fail, ok
|
||||
from .positions_scraper import get_positions
|
||||
|
||||
|
||||
def _aggregate_positions(positions: Iterable[Position]) -> tuple[list[Position], Optional[Decimal]]:
|
||||
aggregated: dict[str, Position] = {}
|
||||
total_value = Decimal("0")
|
||||
has_value = False
|
||||
|
||||
for position in positions:
|
||||
if position.market_value is not None:
|
||||
total_value += position.market_value
|
||||
has_value = True
|
||||
|
||||
key = position.symbol.upper() if position.symbol else "UNKNOWN"
|
||||
if key not in aggregated:
|
||||
aggregated[key] = Position(
|
||||
symbol=position.symbol,
|
||||
description=position.description,
|
||||
asset_type=position.asset_type,
|
||||
quantity=position.quantity,
|
||||
market_price=position.market_price,
|
||||
market_value=position.market_value,
|
||||
cost_basis_total=position.cost_basis_total,
|
||||
unrealized_gain=position.unrealized_gain,
|
||||
unrealized_gain_pct=position.unrealized_gain_pct,
|
||||
lots=list(position.lots),
|
||||
)
|
||||
continue
|
||||
|
||||
existing = aggregated[key]
|
||||
|
||||
if position.quantity is not None:
|
||||
if existing.quantity is None:
|
||||
existing.quantity = position.quantity
|
||||
else:
|
||||
existing.quantity += position.quantity
|
||||
|
||||
if position.market_value is not None:
|
||||
if existing.market_value is None:
|
||||
existing.market_value = position.market_value
|
||||
else:
|
||||
existing.market_value += position.market_value
|
||||
|
||||
if position.cost_basis_total is not None:
|
||||
if existing.cost_basis_total is None:
|
||||
existing.cost_basis_total = position.cost_basis_total
|
||||
else:
|
||||
existing.cost_basis_total += position.cost_basis_total
|
||||
|
||||
if position.unrealized_gain is not None:
|
||||
if existing.unrealized_gain is None:
|
||||
existing.unrealized_gain = position.unrealized_gain
|
||||
else:
|
||||
existing.unrealized_gain += position.unrealized_gain
|
||||
|
||||
if position.market_price is not None:
|
||||
existing.market_price = position.market_price
|
||||
|
||||
if position.unrealized_gain_pct is not None:
|
||||
existing.unrealized_gain_pct = position.unrealized_gain_pct
|
||||
|
||||
if position.description and not existing.description:
|
||||
existing.description = position.description
|
||||
|
||||
if position.asset_type:
|
||||
existing.asset_type = position.asset_type
|
||||
|
||||
if position.lots:
|
||||
existing.lots.extend(position.lots)
|
||||
|
||||
for item in aggregated.values():
|
||||
if item.unrealized_gain is not None and item.cost_basis_total not in (None, Decimal("0")):
|
||||
try:
|
||||
item.unrealized_gain_pct = float((item.unrealized_gain / item.cost_basis_total) * 100)
|
||||
except (InvalidOperation, ZeroDivisionError):
|
||||
item.unrealized_gain_pct = None
|
||||
|
||||
total_value_out = total_value if has_value else None
|
||||
return list(aggregated.values()), total_value_out
|
||||
|
||||
|
||||
async def get_portfolio_snapshot(
|
||||
account: AccountSummary | str | None = None,
|
||||
*,
|
||||
aggregate_by_symbol: bool = True,
|
||||
include_non_equity: bool = False,
|
||||
debug: bool = False,
|
||||
) -> Envelope[PortfolioSnapshot]:
|
||||
positions_envelope = await get_positions(
|
||||
account=account,
|
||||
include_non_equity=include_non_equity,
|
||||
debug=debug,
|
||||
)
|
||||
|
||||
if not positions_envelope["success"]:
|
||||
return fail(
|
||||
positions_envelope.get("error") or "Failed to retrieve positions.",
|
||||
positions_envelope.get("error_type") or ErrorType.UNKNOWN,
|
||||
positions_envelope.get("retryable", True),
|
||||
)
|
||||
|
||||
positions = positions_envelope["data"] or []
|
||||
|
||||
if aggregate_by_symbol:
|
||||
aggregated_positions, total_value = _aggregate_positions(positions)
|
||||
count = len(aggregated_positions)
|
||||
snapshot = PortfolioSnapshot(
|
||||
equities=aggregated_positions,
|
||||
total_value=total_value,
|
||||
count=count,
|
||||
)
|
||||
return ok(snapshot)
|
||||
|
||||
total_value = Decimal("0")
|
||||
has_value = False
|
||||
for position in positions:
|
||||
if position.market_value is not None:
|
||||
total_value += position.market_value
|
||||
has_value = True
|
||||
|
||||
total_value_out = total_value if has_value else None
|
||||
snapshot = PortfolioSnapshot(
|
||||
equities=positions,
|
||||
total_value=total_value_out,
|
||||
count=len(positions),
|
||||
)
|
||||
return ok(snapshot)
|
||||
|
||||
432
schwab_scraper/features/accounts_positions/positions_scraper.py
Normal file
432
schwab_scraper/features/accounts_positions/positions_scraper.py
Normal file
@@ -0,0 +1,432 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any, Optional, Sequence
|
||||
|
||||
from ...browser.auth import ensure_cookies
|
||||
from ...browser.client import connect, new_context, new_page
|
||||
from ...browser.navigation import goto_with_auth_check
|
||||
from ...core import AccountSummary, Envelope, ErrorType, Lot, Position, fail, ok
|
||||
from ...core.config import get_playwright_url, load_config
|
||||
|
||||
POSITIONS_URL = "https://client.schwab.com/app/accounts/positions/#/"
|
||||
|
||||
|
||||
def _parse_decimal(value: str | None) -> Optional[Decimal]:
|
||||
if not value:
|
||||
return None
|
||||
|
||||
cleaned = value.strip()
|
||||
if not cleaned or cleaned in {"-", "--"}:
|
||||
return None
|
||||
|
||||
negative = False
|
||||
if cleaned.startswith("(") and cleaned.endswith(")"):
|
||||
negative = True
|
||||
|
||||
cleaned = (
|
||||
cleaned.replace("$", "")
|
||||
.replace(",", "")
|
||||
.replace("(", "")
|
||||
.replace(")", "")
|
||||
.replace("−", "-")
|
||||
.replace("%", "")
|
||||
.strip()
|
||||
)
|
||||
|
||||
if not cleaned:
|
||||
return None
|
||||
|
||||
try:
|
||||
parsed = Decimal(cleaned)
|
||||
if negative or parsed < 0:
|
||||
parsed = -abs(parsed)
|
||||
return parsed
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_float(value: str | None) -> Optional[float]:
|
||||
decimal_value = _parse_decimal(value)
|
||||
if decimal_value is None:
|
||||
return None
|
||||
try:
|
||||
return float(decimal_value)
|
||||
except (ValueError, InvalidOperation):
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_account_label(label: str) -> AccountSummary:
|
||||
normalized = re.sub(r"\s+", " ", label).strip()
|
||||
last4_match = re.search(r"(\d{3,4})\b", normalized.replace(" ", ""))
|
||||
last4 = last4_match.group(1)[-4:] if last4_match else None
|
||||
|
||||
type_match = re.search(r"^[A-Za-z&'\- ]+", normalized)
|
||||
account_type = re.sub(r"\s+", "_", type_match.group(0).strip()) if type_match else "Account"
|
||||
|
||||
account_id = f"{account_type}-{last4}" if last4 else account_type
|
||||
|
||||
return AccountSummary(
|
||||
id=account_id,
|
||||
label=normalized,
|
||||
type=account_type,
|
||||
last4=last4,
|
||||
is_margin="margin" in normalized.lower(),
|
||||
)
|
||||
|
||||
|
||||
def _match_account(candidate: AccountSummary, requested: AccountSummary | str | None) -> bool:
|
||||
if requested is None:
|
||||
return True
|
||||
|
||||
if isinstance(requested, AccountSummary):
|
||||
requested_values = {
|
||||
requested.id.lower(),
|
||||
requested.label.lower(),
|
||||
}
|
||||
if requested.last4:
|
||||
requested_values.add(requested.last4.lower())
|
||||
else:
|
||||
lookup = requested.strip().lower()
|
||||
requested_values = {lookup}
|
||||
|
||||
candidate_values = {candidate.id.lower(), candidate.label.lower()}
|
||||
if candidate.last4:
|
||||
candidate_values.add(candidate.last4.lower())
|
||||
|
||||
return bool(candidate_values & requested_values)
|
||||
|
||||
|
||||
def classify_asset(symbol: str | None, description: str | None) -> str:
|
||||
if symbol:
|
||||
sym = symbol.strip().upper()
|
||||
else:
|
||||
sym = ""
|
||||
desc = (description or "").strip().upper()
|
||||
|
||||
if sym and re.fullmatch(r"[A-Z]{1,5}", sym):
|
||||
if "ETF" in desc:
|
||||
return "ETF"
|
||||
if any(kw in desc for kw in ["FUND", "MUTUAL"]):
|
||||
return "MUTUAL_FUND"
|
||||
return "EQUITY"
|
||||
|
||||
if sym and re.search(r"\d", sym) and len(sym) > 5:
|
||||
return "OPTION"
|
||||
|
||||
if any(kw in desc for kw in ["BOND", "CD", "TREASURY"]):
|
||||
return "BOND"
|
||||
|
||||
if sym in {"CASH", "MMDA", "SWEEP"} or "CASH" in desc:
|
||||
return "CASH"
|
||||
|
||||
if "ETF" in desc:
|
||||
return "ETF"
|
||||
if "FUND" in desc:
|
||||
return "MUTUAL_FUND"
|
||||
|
||||
return "OTHER"
|
||||
|
||||
|
||||
async def _evaluate_table(page) -> dict[str, Any] | None:
|
||||
return await page.evaluate(
|
||||
"""
|
||||
() => {
|
||||
const table = document.querySelector('#positionsDetails');
|
||||
if (!table) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const headers = Array.from(table.querySelectorAll('thead tr th')).map((th) =>
|
||||
(th.innerText || th.textContent || '').trim()
|
||||
);
|
||||
|
||||
const rowElements = Array.from(table.querySelectorAll('tbody tr'));
|
||||
const rows = [];
|
||||
let current = null;
|
||||
let currentAccount = null;
|
||||
|
||||
const isLotRow = (row) => {
|
||||
const klass = (row.className || '').toLowerCase();
|
||||
if (klass.includes('lot') || klass.includes('sub') || klass.includes('child')) {
|
||||
return true;
|
||||
}
|
||||
const dataRole = (row.getAttribute('data-row-type') || '').toLowerCase();
|
||||
return dataRole.includes('lot');
|
||||
};
|
||||
|
||||
const isPositionRow = (row) => {
|
||||
const klass = (row.className || '').toLowerCase();
|
||||
return klass.includes('position-row');
|
||||
};
|
||||
|
||||
const isAccountHeader = (row) => {
|
||||
const klass = (row.className || '').toLowerCase();
|
||||
const text = (row.textContent || '').trim();
|
||||
return !klass.includes('position-row') &&
|
||||
(klass.includes('highlight-row') || klass.includes('border-top-dark')) &&
|
||||
text.includes('account panel');
|
||||
};
|
||||
|
||||
for (const row of rowElements) {
|
||||
// Check if this is an account header row
|
||||
if (isAccountHeader(row)) {
|
||||
const text = row.textContent.trim();
|
||||
// Extract account name from account panel text
|
||||
const match = text.match(/account panel[\\s\\n]+([^\\n]+)/);
|
||||
if (match) {
|
||||
currentAccount = match[1].trim();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const cells = Array.from(row.querySelectorAll('td')).map((cell) =>
|
||||
(cell.innerText || cell.textContent || '').trim()
|
||||
);
|
||||
|
||||
if (!cells.length) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isLotRow(row)) {
|
||||
if (current) {
|
||||
current.lots.push(cells);
|
||||
}
|
||||
} else if (isPositionRow(row)) {
|
||||
// Extract symbol from data-symbol attribute
|
||||
const symbol = row.getAttribute('data-symbol') || '';
|
||||
current = {
|
||||
type: 'position',
|
||||
cells: cells,
|
||||
lots: [],
|
||||
symbol: symbol,
|
||||
account: currentAccount
|
||||
};
|
||||
rows.push(current);
|
||||
}
|
||||
}
|
||||
|
||||
return { headers, rows };
|
||||
}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def _map_row(headers: Sequence[str], cells: Sequence[str]) -> dict[str, str]:
|
||||
result: dict[str, str] = {}
|
||||
|
||||
# Special handling: The table has columns in headers that don't correspond to cells
|
||||
# Headers: ['', 'Symbol', 'Description', 'Qty', 'Price', ...]
|
||||
# Cells: ['VANGUARD...', '192.5', '$328.17', ...]
|
||||
# The first two headers (empty checkbox and Symbol) have no corresponding cells
|
||||
# So: Cell 0 → 'Description', Cell 1 → 'Qty', Cell 2 → 'Price', etc.
|
||||
|
||||
# Find the symbol header index to know where the offset starts
|
||||
symbol_header_idx = None
|
||||
for idx, header in enumerate(headers):
|
||||
key = header.strip().lower()
|
||||
if 'symbol' in key and 'description' not in key:
|
||||
symbol_header_idx = idx
|
||||
break
|
||||
|
||||
# Calculate offset - typically 2 (empty column + symbol column)
|
||||
offset = symbol_header_idx + 1 if symbol_header_idx is not None else 0
|
||||
|
||||
for idx, header in enumerate(headers):
|
||||
# Normalize header: take first line, strip, lowercase
|
||||
# Headers often have format "Label\nsort\nfieldname"
|
||||
header_parts = header.strip().split('\n')
|
||||
key = header_parts[0].strip().lower() if header_parts else ""
|
||||
if not key:
|
||||
key = f"column_{idx}"
|
||||
|
||||
# Map header to cell with offset
|
||||
if idx < offset:
|
||||
# These headers (empty, symbol) have no corresponding cells
|
||||
value = ""
|
||||
else:
|
||||
cell_idx = idx - offset
|
||||
value = cells[cell_idx].strip() if cell_idx < len(cells) else ""
|
||||
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
def _parse_lots(lot_rows: Sequence[Sequence[str]]) -> list[Lot]:
|
||||
lots: list[Lot] = []
|
||||
for cells in lot_rows:
|
||||
if not cells:
|
||||
continue
|
||||
|
||||
acquired_date = cells[0].strip() if len(cells) > 0 else None
|
||||
quantity = _parse_float(cells[1] if len(cells) > 1 else None)
|
||||
cost_basis = _parse_decimal(cells[2] if len(cells) > 2 else None)
|
||||
lot_id = cells[3].strip() if len(cells) > 3 else None
|
||||
|
||||
lots.append(
|
||||
Lot(
|
||||
acquired_date=acquired_date or None,
|
||||
quantity=quantity,
|
||||
cost_basis=cost_basis,
|
||||
lot_id=lot_id or None,
|
||||
)
|
||||
)
|
||||
return lots
|
||||
|
||||
|
||||
def _row_to_position(row_map: dict[str, str], lots_rows: Sequence[Sequence[str]], symbol: str = "") -> Position:
|
||||
# Symbol is now passed from data-symbol attribute on row
|
||||
# Description is in the first visible cell
|
||||
description = row_map.get('description') or row_map.get('name') or row_map.get('column_1') or ""
|
||||
|
||||
# Price is typically in column labeled 'price' or similar
|
||||
market_price = _parse_decimal(
|
||||
row_map.get('price')
|
||||
or row_map.get('market price')
|
||||
or row_map.get('last price')
|
||||
)
|
||||
|
||||
# Quantity - now in different column due to layout change
|
||||
quantity = _parse_float(row_map.get('quantity') or row_map.get('qty'))
|
||||
market_value = _parse_decimal(row_map.get('market value') or row_map.get('mkt val'))
|
||||
cost_basis_total = _parse_decimal(row_map.get('cost basis') or row_map.get('total cost'))
|
||||
unrealized_gain = _parse_decimal(
|
||||
row_map.get('gain/loss $')
|
||||
or row_map.get('unrealized gain')
|
||||
or row_map.get('gain/loss')
|
||||
)
|
||||
unrealized_gain_pct = _parse_float(
|
||||
row_map.get('gain/loss %')
|
||||
or row_map.get('unrealized gain %')
|
||||
)
|
||||
|
||||
asset_type = classify_asset(symbol, description)
|
||||
|
||||
lots = _parse_lots(lots_rows)
|
||||
|
||||
return Position(
|
||||
symbol=symbol or "",
|
||||
description=description or None,
|
||||
asset_type=asset_type,
|
||||
quantity=quantity,
|
||||
market_price=market_price,
|
||||
market_value=market_value,
|
||||
cost_basis_total=cost_basis_total,
|
||||
unrealized_gain=unrealized_gain,
|
||||
unrealized_gain_pct=unrealized_gain_pct,
|
||||
lots=lots,
|
||||
)
|
||||
|
||||
|
||||
async def get_positions(
|
||||
account: AccountSummary | str | None = None,
|
||||
*,
|
||||
include_non_equity: bool = False,
|
||||
debug: bool = False,
|
||||
) -> Envelope[list[Position]]:
|
||||
cookies = await ensure_cookies()
|
||||
if not cookies:
|
||||
return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
|
||||
playwright = browser = context = page = None
|
||||
try:
|
||||
playwright, browser = await connect(playwright_url)
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
if not await goto_with_auth_check(page, context, POSITIONS_URL, debug=debug):
|
||||
return fail("Failed to load Schwab positions page.", ErrorType.AUTHENTICATION, retryable=True)
|
||||
|
||||
await page.wait_for_selector('#positionsDetails', timeout=45000)
|
||||
await page.wait_for_timeout(1000)
|
||||
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
|
||||
await page.wait_for_timeout(1500)
|
||||
|
||||
table_data = await _evaluate_table(page)
|
||||
if not table_data:
|
||||
return fail("Unable to locate positions table.", ErrorType.PARSING, retryable=True)
|
||||
|
||||
headers = [header.strip().lower() for header in table_data.get('headers') or []]
|
||||
if not headers:
|
||||
return fail("Positions table headers not found.", ErrorType.PARSING, retryable=True)
|
||||
|
||||
positions: list[Position] = []
|
||||
|
||||
for row in table_data.get('rows', []):
|
||||
if row.get('type') != 'position':
|
||||
continue
|
||||
|
||||
cells = row.get('cells') or []
|
||||
symbol = row.get('symbol') or ""
|
||||
account_label = row.get('account') or ""
|
||||
|
||||
row_map = _map_row(headers, cells)
|
||||
position = _row_to_position(row_map, row.get('lots') or [], symbol=symbol)
|
||||
|
||||
# Filter by account if requested
|
||||
if account is not None and account_label:
|
||||
# Normalize the account label from the row
|
||||
account_summary = _normalize_account_label(account_label)
|
||||
if not _match_account(account_summary, account):
|
||||
continue
|
||||
elif account is not None and not account_label:
|
||||
# If filtering by account but row has no account, skip it
|
||||
continue
|
||||
|
||||
if not include_non_equity and position.asset_type not in {"EQUITY", "ETF"}:
|
||||
continue
|
||||
|
||||
positions.append(position)
|
||||
|
||||
if not positions:
|
||||
return fail("No positions matched the requested criteria.", ErrorType.VALIDATION, retryable=False)
|
||||
|
||||
return ok(positions)
|
||||
except Exception as exc:
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
finally:
|
||||
await _safe_close_page(page)
|
||||
await _safe_close_context(context)
|
||||
await _safe_close_browser(browser)
|
||||
await _safe_stop_playwright(playwright)
|
||||
|
||||
|
||||
async def _safe_close_page(page) -> None:
|
||||
if page is None:
|
||||
return
|
||||
try:
|
||||
await page.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_context(context) -> None:
|
||||
if context is None:
|
||||
return
|
||||
try:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_close_browser(browser) -> None:
|
||||
if browser is None:
|
||||
return
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def _safe_stop_playwright(playwright) -> None:
|
||||
if playwright is None:
|
||||
return
|
||||
try:
|
||||
await playwright.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user