Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s

This commit is contained in:
2026-04-24 01:50:20 +00:00
parent 02ac293692
commit 650ea2d087
43 changed files with 10900 additions and 41 deletions

View File

@@ -0,0 +1,20 @@
from .contracts import ( # noqa: F401
Envelope,
ErrorType,
AccountOverview,
AccountSummary,
Lot,
MorningstarData,
PortfolioSnapshot,
Position,
SessionStatus,
Transaction,
# Phase 1 data structures
QuoteData,
EnhancedDividends,
EarningsData,
CalculatedMetrics,
EquityPhase1Data,
fail,
ok,
)

View File

@@ -0,0 +1,134 @@
import json
import logging
import os
from typing import Optional
# Module-level state for runtime path overrides
_config_path_override: Optional[str] = None
_cookies_path_override: Optional[str] = None
def set_config_path(path: Optional[str]) -> None:
"""
Set a custom path for config.json at runtime.
This override takes precedence over environment variables and defaults.
Note: This uses module-level state and is not thread-safe. Suitable for
single-threaded CLI usage or single async operations.
Args:
path: Absolute or relative path to config file, or None to reset
"""
global _config_path_override
_config_path_override = path
def set_cookies_path(path: Optional[str]) -> None:
"""
Set a custom path for cookies.json at runtime.
This override takes precedence over environment variables and defaults.
Note: This uses module-level state and is not thread-safe. Suitable for
single-threaded CLI usage or single async operations.
Args:
path: Absolute or relative path to cookies file, or None to reset
"""
global _cookies_path_override
_cookies_path_override = path
def get_config_path() -> str:
"""
Resolve the configuration file path using priority order:
1. Runtime override (set_config_path)
2. Environment variable SCHWAB_CONFIG_PATH
3. Default locations (../config.json relative to module, then ./config.json)
Returns:
str: Path to configuration file
"""
# Priority 1: Runtime override
if _config_path_override:
return _config_path_override
# Priority 2: Environment variable
env_path = os.environ.get('SCHWAB_CONFIG_PATH')
if env_path:
return env_path
# Priority 3: Default locations
# Try package root first (for development/installed package)
default_path = os.path.join(os.path.dirname(__file__), '..', 'config.json')
if os.path.exists(default_path):
return default_path
# Fall back to current working directory
return 'config.json'
def get_cookies_path() -> str:
"""
Resolve the cookies file path using priority order:
1. Runtime override (set_cookies_path)
2. Environment variable SCHWAB_COOKIES_PATH
3. Default location (./cookies.json in CWD)
Returns:
str: Path to cookies file
"""
# Priority 1: Runtime override
if _cookies_path_override:
return _cookies_path_override
# Priority 2: Environment variable
env_path = os.environ.get('SCHWAB_COOKIES_PATH')
if env_path:
return env_path
# Priority 3: Default location
return 'cookies.json'
def load_config():
"""Load configuration from config.json (or custom path if configured)"""
logger = logging.getLogger(__name__)
config_path = get_config_path()
try:
with open(config_path, 'r') as f:
return json.load(f)
except FileNotFoundError:
logger.error(f"config.json not found at {config_path}. Please create one based on config.json.sample")
return None
except json.JSONDecodeError:
logger.error(f"Invalid JSON in config file at {config_path}")
return None
def get_playwright_url(config=None):
"""Get the Playwright browserless URL from config"""
import os
env_url = os.environ.get('SCHWAB_PLAYWRIGHT_URL')
if env_url:
return env_url
if config is None:
config = load_config()
if config and 'playwright' in config and 'url' in config['playwright']:
return config['playwright']['url']
else:
# Default fallback URL
return "ws://browser.local.ben.io:3000/playwright/chromium"
def get_schwab_credentials(config=None):
"""Get Schwab credentials from config"""
if config is None:
config = load_config()
if config and 'schwab' in config:
return config['schwab'].get('username'), config['schwab'].get('password')
else:
return None, None

View File

@@ -0,0 +1,271 @@
from __future__ import annotations
from dataclasses import dataclass, field
from datetime import datetime
from decimal import Decimal
from enum import Enum
from typing import Generic, Optional, TypeVar
from typing_extensions import TypedDict
T = TypeVar("T")
class ErrorType(str, Enum):
"""Categorisation for envelope failures."""
AUTHENTICATION = "AUTHENTICATION"
NETWORK = "NETWORK"
PARSING = "PARSING"
VALIDATION = "VALIDATION"
UNKNOWN = "UNKNOWN"
class Envelope(TypedDict, Generic[T]):
"""Standard response envelope for unified API operations."""
success: bool
data: Optional[T]
error: Optional[str]
error_type: Optional[ErrorType]
retryable: bool
def ok(data: T) -> Envelope[T]:
"""Create a success envelope containing the provided data."""
return {
"success": True,
"data": data,
"error": None,
"error_type": None,
"retryable": False,
}
def fail(
error: str,
error_type: ErrorType | str = ErrorType.UNKNOWN,
retryable: bool = False,
) -> Envelope[None]:
"""Create a failure envelope with error metadata."""
resolved_error_type: ErrorType
if isinstance(error_type, ErrorType):
resolved_error_type = error_type
else:
try:
resolved_error_type = ErrorType(error_type)
except ValueError:
resolved_error_type = ErrorType.UNKNOWN
return {
"success": False,
"data": None,
"error": error,
"error_type": resolved_error_type,
"retryable": retryable,
}
@dataclass(slots=True)
class SessionStatus:
"""Represents the current authentication session state."""
logged_in: bool
session_age_minutes: Optional[int] = None
last_refresh: Optional[datetime] = None
needs_mfa: bool = False
cookies_valid: bool = True
@dataclass(slots=True)
class AccountSummary:
"""Summary details for a Schwab account."""
id: str
label: str
type: str
last4: Optional[str] = None
is_margin: bool = False
@dataclass(slots=True)
class AccountOverview:
"""Aggregated balance snapshot for an account."""
account: AccountSummary
total_value: Optional[Decimal] = None
day_change: Optional[Decimal] = None
day_change_pct: Optional[float] = None
cash: Optional[Decimal] = None
settled_cash: Optional[Decimal] = None
buying_power: Optional[Decimal] = None
margin_balance: Optional[Decimal] = None
@dataclass(slots=True)
class Lot:
"""Individual lot information within a position."""
acquired_date: Optional[str] = None
quantity: Optional[float] = None
cost_basis: Optional[Decimal] = None
lot_id: Optional[str] = None
@dataclass(slots=True)
class Position:
"""Holding data for a specific security."""
symbol: str
description: Optional[str] = None
asset_type: Optional[str] = None
quantity: Optional[float] = None
market_price: Optional[Decimal] = None
market_value: Optional[Decimal] = None
cost_basis_total: Optional[Decimal] = None
unrealized_gain: Optional[Decimal] = None
unrealized_gain_pct: Optional[float] = None
lots: list[Lot] = field(default_factory=list)
@dataclass(slots=True)
class PortfolioSnapshot:
"""Aggregated view of equity holdings across accounts."""
equities: list[Position]
total_value: Optional[Decimal] = None
count: int = 0
@dataclass(slots=True)
class MorningstarData:
"""Unified Morningstar data payload (existing equity fields)."""
ticker: str
company_name: Optional[str] = None
previous_dividend_payment: Optional[str] = None
previous_pay_date: Optional[str] = None
previous_ex_date: Optional[str] = None
frequency: Optional[str] = None
annual_dividend_rate: Optional[str] = None
annual_dividend_yield: Optional[str] = None
fair_value: Optional[str] = None
economic_moat: Optional[str] = None
capital_allocation: Optional[str] = None
rating: Optional[int] = None
one_star_price: Optional[str] = None
five_star_price: Optional[str] = None
assessment: Optional[str] = None
range_52_week: Optional[str] = None
dividend_yield: Optional[str] = None
investment_style: Optional[str] = None
report_url: Optional[str] = None
report_date: Optional[str] = None
source: Optional[str] = None
@dataclass(slots=True)
class Transaction:
"""Normalized transaction record matching transactions feature."""
date: str
action: str
symbol: Optional[str]
description: str
quantity: Optional[str]
price: Optional[str]
fees_comm: Optional[str]
amount: Optional[str]
# Phase 1 Data Structures
@dataclass(slots=True)
class QuoteData:
"""Quote and price data from symbol bar."""
price: Optional[float] = None
change: Optional[float] = None
change_percent: Optional[float] = None
after_hours_price: Optional[float] = None
after_hours_change: Optional[float] = None
after_hours_change_percent: Optional[float] = None
bid: Optional[float] = None
ask: Optional[float] = None
bid_ask_size: Optional[str] = None
previous_close: Optional[float] = None
open: Optional[float] = None
volume: Optional[int] = None
volume_vs_avg: Optional[str] = None
day_range_low: Optional[float] = None
day_range_high: Optional[float] = None
week_52_low: Optional[float] = None
week_52_high: Optional[float] = None
market_cap: Optional[str] = None
sector: Optional[str] = None
exchange: Optional[str] = None
@dataclass(slots=True)
class EnhancedDividends:
"""Enhanced dividend data including forward-looking information."""
# Forward-looking data (Phase 1)
next_payment: Optional[float] = None
next_pay_date: Optional[str] = None
next_ex_date: Optional[str] = None
# Existing data
frequency: Optional[str] = None
annual_rate: Optional[float] = None
annual_yield: Optional[float] = None
previous_payment: Optional[float] = None
previous_pay_date: Optional[str] = None
previous_ex_date: Optional[str] = None
@dataclass(slots=True)
class EarningsData:
"""Core earnings metrics and forecasts."""
# Upcoming earnings
next_announcement_date: Optional[str] = None
announcement_timing: Optional[str] = None
analysts_covering: Optional[int] = None
consensus_estimate: Optional[float] = None
estimate_high: Optional[float] = None
estimate_low: Optional[float] = None
# Historical earnings
eps_ttm: Optional[float] = None
revenue_ttm: Optional[float] = None # Stored in dollars
pe_ttm: Optional[float] = None
forward_pe: Optional[float] = None
peg_ratio: Optional[float] = None
# Beat/miss history (simplified for Phase 1)
recent_beats: list[dict] = field(default_factory=list)
future_estimates: list[dict] = field(default_factory=list)
@dataclass(slots=True)
class CalculatedMetrics:
"""Calculated metrics derived from other data."""
payout_ratio: Optional[float] = None
@dataclass(slots=True)
class EquityPhase1Data:
"""Complete Phase 1 enhanced equity data."""
ticker: str
quote: Optional[QuoteData] = None
dividends: Optional[EnhancedDividends] = None
earnings: Optional[EarningsData] = None
calculated_metrics: Optional[CalculatedMetrics] = None

View File

@@ -0,0 +1,30 @@
class ScraperError(Exception):
"""Base class for scraper-related errors."""
class SessionExpiredError(ScraperError):
pass
class LoginError(ScraperError):
pass
class InvalidTickerError(ScraperError):
pass
class NoDataError(ScraperError):
pass
class DownloadError(ScraperError):
pass
class PdfParseError(ScraperError):
pass
class NavigationError(ScraperError):
pass

View File

@@ -0,0 +1,66 @@
from dataclasses import dataclass
from typing import Optional, List
@dataclass
class DividendsData:
previous_payment: Optional[str] = None
previous_pay_date: Optional[str] = None
previous_ex_date: Optional[str] = None
frequency: Optional[str] = None
annual_dividend_rate: Optional[str] = None
annual_dividend_yield: Optional[str] = None
@dataclass
class MorningstarPdfData:
fair_value: Optional[str] = None
economic_moat: Optional[str] = None
capital_allocation: Optional[str] = None
rating: Optional[int] = None
one_star_price: Optional[str] = None
five_star_price: Optional[str] = None
assessment: Optional[str] = None
range_52_week: Optional[str] = None
dividend_yield: Optional[str] = None
investment_style: Optional[str] = None
report_url: Optional[str] = None
report_date: Optional[str] = None
@dataclass
class ScrapeResult:
ticker: str
company_name: Optional[str]
dividends: DividendsData
morningstar: MorningstarPdfData
source: str # "live" | "cache"
# -------------------- Transactions Feature --------------------
@dataclass
class AccountInfo:
account_type: str # e.g., "Joint", "IRA", "Individual"
account_ending: str # e.g., "604", "197", "873"
full_description: str # e.g., "Joint …604 (Account ending in 6 0 4)"
is_selected: bool = False
@dataclass
class TransactionRecord:
date: str
action: str
symbol: Optional[str]
description: str
quantity: Optional[str]
price: Optional[str]
fees_comm: Optional[str]
amount: Optional[str]
@dataclass
class TransactionData:
account_info: AccountInfo
transactions: List[TransactionRecord]
date_range: str
export_date: str
total_transactions: int
source: str # "live" | "cache"