Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
This commit is contained in:
20
schwab_scraper/core/__init__.py
Normal file
20
schwab_scraper/core/__init__.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from .contracts import ( # noqa: F401
|
||||
Envelope,
|
||||
ErrorType,
|
||||
AccountOverview,
|
||||
AccountSummary,
|
||||
Lot,
|
||||
MorningstarData,
|
||||
PortfolioSnapshot,
|
||||
Position,
|
||||
SessionStatus,
|
||||
Transaction,
|
||||
# Phase 1 data structures
|
||||
QuoteData,
|
||||
EnhancedDividends,
|
||||
EarningsData,
|
||||
CalculatedMetrics,
|
||||
EquityPhase1Data,
|
||||
fail,
|
||||
ok,
|
||||
)
|
||||
134
schwab_scraper/core/config.py
Normal file
134
schwab_scraper/core/config.py
Normal file
@@ -0,0 +1,134 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
# Module-level state for runtime path overrides
|
||||
_config_path_override: Optional[str] = None
|
||||
_cookies_path_override: Optional[str] = None
|
||||
|
||||
|
||||
def set_config_path(path: Optional[str]) -> None:
|
||||
"""
|
||||
Set a custom path for config.json at runtime.
|
||||
This override takes precedence over environment variables and defaults.
|
||||
|
||||
Note: This uses module-level state and is not thread-safe. Suitable for
|
||||
single-threaded CLI usage or single async operations.
|
||||
|
||||
Args:
|
||||
path: Absolute or relative path to config file, or None to reset
|
||||
"""
|
||||
global _config_path_override
|
||||
_config_path_override = path
|
||||
|
||||
|
||||
def set_cookies_path(path: Optional[str]) -> None:
|
||||
"""
|
||||
Set a custom path for cookies.json at runtime.
|
||||
This override takes precedence over environment variables and defaults.
|
||||
|
||||
Note: This uses module-level state and is not thread-safe. Suitable for
|
||||
single-threaded CLI usage or single async operations.
|
||||
|
||||
Args:
|
||||
path: Absolute or relative path to cookies file, or None to reset
|
||||
"""
|
||||
global _cookies_path_override
|
||||
_cookies_path_override = path
|
||||
|
||||
|
||||
def get_config_path() -> str:
|
||||
"""
|
||||
Resolve the configuration file path using priority order:
|
||||
1. Runtime override (set_config_path)
|
||||
2. Environment variable SCHWAB_CONFIG_PATH
|
||||
3. Default locations (../config.json relative to module, then ./config.json)
|
||||
|
||||
Returns:
|
||||
str: Path to configuration file
|
||||
"""
|
||||
# Priority 1: Runtime override
|
||||
if _config_path_override:
|
||||
return _config_path_override
|
||||
|
||||
# Priority 2: Environment variable
|
||||
env_path = os.environ.get('SCHWAB_CONFIG_PATH')
|
||||
if env_path:
|
||||
return env_path
|
||||
|
||||
# Priority 3: Default locations
|
||||
# Try package root first (for development/installed package)
|
||||
default_path = os.path.join(os.path.dirname(__file__), '..', 'config.json')
|
||||
if os.path.exists(default_path):
|
||||
return default_path
|
||||
|
||||
# Fall back to current working directory
|
||||
return 'config.json'
|
||||
|
||||
|
||||
def get_cookies_path() -> str:
|
||||
"""
|
||||
Resolve the cookies file path using priority order:
|
||||
1. Runtime override (set_cookies_path)
|
||||
2. Environment variable SCHWAB_COOKIES_PATH
|
||||
3. Default location (./cookies.json in CWD)
|
||||
|
||||
Returns:
|
||||
str: Path to cookies file
|
||||
"""
|
||||
# Priority 1: Runtime override
|
||||
if _cookies_path_override:
|
||||
return _cookies_path_override
|
||||
|
||||
# Priority 2: Environment variable
|
||||
env_path = os.environ.get('SCHWAB_COOKIES_PATH')
|
||||
if env_path:
|
||||
return env_path
|
||||
|
||||
# Priority 3: Default location
|
||||
return 'cookies.json'
|
||||
|
||||
|
||||
def load_config():
|
||||
"""Load configuration from config.json (or custom path if configured)"""
|
||||
logger = logging.getLogger(__name__)
|
||||
config_path = get_config_path()
|
||||
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
return json.load(f)
|
||||
except FileNotFoundError:
|
||||
logger.error(f"config.json not found at {config_path}. Please create one based on config.json.sample")
|
||||
return None
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"Invalid JSON in config file at {config_path}")
|
||||
return None
|
||||
|
||||
|
||||
def get_playwright_url(config=None):
|
||||
"""Get the Playwright browserless URL from config"""
|
||||
import os
|
||||
env_url = os.environ.get('SCHWAB_PLAYWRIGHT_URL')
|
||||
if env_url:
|
||||
return env_url
|
||||
|
||||
if config is None:
|
||||
config = load_config()
|
||||
|
||||
if config and 'playwright' in config and 'url' in config['playwright']:
|
||||
return config['playwright']['url']
|
||||
else:
|
||||
# Default fallback URL
|
||||
return "ws://browser.local.ben.io:3000/playwright/chromium"
|
||||
|
||||
|
||||
def get_schwab_credentials(config=None):
|
||||
"""Get Schwab credentials from config"""
|
||||
if config is None:
|
||||
config = load_config()
|
||||
|
||||
if config and 'schwab' in config:
|
||||
return config['schwab'].get('username'), config['schwab'].get('password')
|
||||
else:
|
||||
return None, None
|
||||
271
schwab_scraper/core/contracts.py
Normal file
271
schwab_scraper/core/contracts.py
Normal file
@@ -0,0 +1,271 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from enum import Enum
|
||||
from typing import Generic, Optional, TypeVar
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class ErrorType(str, Enum):
|
||||
"""Categorisation for envelope failures."""
|
||||
|
||||
AUTHENTICATION = "AUTHENTICATION"
|
||||
NETWORK = "NETWORK"
|
||||
PARSING = "PARSING"
|
||||
VALIDATION = "VALIDATION"
|
||||
UNKNOWN = "UNKNOWN"
|
||||
|
||||
|
||||
class Envelope(TypedDict, Generic[T]):
|
||||
"""Standard response envelope for unified API operations."""
|
||||
|
||||
success: bool
|
||||
data: Optional[T]
|
||||
error: Optional[str]
|
||||
error_type: Optional[ErrorType]
|
||||
retryable: bool
|
||||
|
||||
|
||||
def ok(data: T) -> Envelope[T]:
|
||||
"""Create a success envelope containing the provided data."""
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": data,
|
||||
"error": None,
|
||||
"error_type": None,
|
||||
"retryable": False,
|
||||
}
|
||||
|
||||
|
||||
def fail(
|
||||
error: str,
|
||||
error_type: ErrorType | str = ErrorType.UNKNOWN,
|
||||
retryable: bool = False,
|
||||
) -> Envelope[None]:
|
||||
"""Create a failure envelope with error metadata."""
|
||||
|
||||
resolved_error_type: ErrorType
|
||||
if isinstance(error_type, ErrorType):
|
||||
resolved_error_type = error_type
|
||||
else:
|
||||
try:
|
||||
resolved_error_type = ErrorType(error_type)
|
||||
except ValueError:
|
||||
resolved_error_type = ErrorType.UNKNOWN
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"data": None,
|
||||
"error": error,
|
||||
"error_type": resolved_error_type,
|
||||
"retryable": retryable,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SessionStatus:
|
||||
"""Represents the current authentication session state."""
|
||||
|
||||
logged_in: bool
|
||||
session_age_minutes: Optional[int] = None
|
||||
last_refresh: Optional[datetime] = None
|
||||
needs_mfa: bool = False
|
||||
cookies_valid: bool = True
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AccountSummary:
|
||||
"""Summary details for a Schwab account."""
|
||||
|
||||
id: str
|
||||
label: str
|
||||
type: str
|
||||
last4: Optional[str] = None
|
||||
is_margin: bool = False
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class AccountOverview:
|
||||
"""Aggregated balance snapshot for an account."""
|
||||
|
||||
account: AccountSummary
|
||||
total_value: Optional[Decimal] = None
|
||||
day_change: Optional[Decimal] = None
|
||||
day_change_pct: Optional[float] = None
|
||||
cash: Optional[Decimal] = None
|
||||
settled_cash: Optional[Decimal] = None
|
||||
buying_power: Optional[Decimal] = None
|
||||
margin_balance: Optional[Decimal] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Lot:
|
||||
"""Individual lot information within a position."""
|
||||
|
||||
acquired_date: Optional[str] = None
|
||||
quantity: Optional[float] = None
|
||||
cost_basis: Optional[Decimal] = None
|
||||
lot_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Position:
|
||||
"""Holding data for a specific security."""
|
||||
|
||||
symbol: str
|
||||
description: Optional[str] = None
|
||||
asset_type: Optional[str] = None
|
||||
quantity: Optional[float] = None
|
||||
market_price: Optional[Decimal] = None
|
||||
market_value: Optional[Decimal] = None
|
||||
cost_basis_total: Optional[Decimal] = None
|
||||
unrealized_gain: Optional[Decimal] = None
|
||||
unrealized_gain_pct: Optional[float] = None
|
||||
lots: list[Lot] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class PortfolioSnapshot:
|
||||
"""Aggregated view of equity holdings across accounts."""
|
||||
|
||||
equities: list[Position]
|
||||
total_value: Optional[Decimal] = None
|
||||
count: int = 0
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class MorningstarData:
|
||||
"""Unified Morningstar data payload (existing equity fields)."""
|
||||
|
||||
ticker: str
|
||||
company_name: Optional[str] = None
|
||||
previous_dividend_payment: Optional[str] = None
|
||||
previous_pay_date: Optional[str] = None
|
||||
previous_ex_date: Optional[str] = None
|
||||
frequency: Optional[str] = None
|
||||
annual_dividend_rate: Optional[str] = None
|
||||
annual_dividend_yield: Optional[str] = None
|
||||
fair_value: Optional[str] = None
|
||||
economic_moat: Optional[str] = None
|
||||
capital_allocation: Optional[str] = None
|
||||
rating: Optional[int] = None
|
||||
one_star_price: Optional[str] = None
|
||||
five_star_price: Optional[str] = None
|
||||
assessment: Optional[str] = None
|
||||
range_52_week: Optional[str] = None
|
||||
dividend_yield: Optional[str] = None
|
||||
investment_style: Optional[str] = None
|
||||
report_url: Optional[str] = None
|
||||
report_date: Optional[str] = None
|
||||
source: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class Transaction:
|
||||
"""Normalized transaction record matching transactions feature."""
|
||||
|
||||
date: str
|
||||
action: str
|
||||
symbol: Optional[str]
|
||||
description: str
|
||||
quantity: Optional[str]
|
||||
price: Optional[str]
|
||||
fees_comm: Optional[str]
|
||||
amount: Optional[str]
|
||||
|
||||
|
||||
# Phase 1 Data Structures
|
||||
|
||||
@dataclass(slots=True)
|
||||
class QuoteData:
|
||||
"""Quote and price data from symbol bar."""
|
||||
|
||||
price: Optional[float] = None
|
||||
change: Optional[float] = None
|
||||
change_percent: Optional[float] = None
|
||||
after_hours_price: Optional[float] = None
|
||||
after_hours_change: Optional[float] = None
|
||||
after_hours_change_percent: Optional[float] = None
|
||||
bid: Optional[float] = None
|
||||
ask: Optional[float] = None
|
||||
bid_ask_size: Optional[str] = None
|
||||
previous_close: Optional[float] = None
|
||||
open: Optional[float] = None
|
||||
volume: Optional[int] = None
|
||||
volume_vs_avg: Optional[str] = None
|
||||
day_range_low: Optional[float] = None
|
||||
day_range_high: Optional[float] = None
|
||||
week_52_low: Optional[float] = None
|
||||
week_52_high: Optional[float] = None
|
||||
market_cap: Optional[str] = None
|
||||
sector: Optional[str] = None
|
||||
exchange: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EnhancedDividends:
|
||||
"""Enhanced dividend data including forward-looking information."""
|
||||
|
||||
# Forward-looking data (Phase 1)
|
||||
next_payment: Optional[float] = None
|
||||
next_pay_date: Optional[str] = None
|
||||
next_ex_date: Optional[str] = None
|
||||
|
||||
# Existing data
|
||||
frequency: Optional[str] = None
|
||||
annual_rate: Optional[float] = None
|
||||
annual_yield: Optional[float] = None
|
||||
previous_payment: Optional[float] = None
|
||||
previous_pay_date: Optional[str] = None
|
||||
previous_ex_date: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EarningsData:
|
||||
"""Core earnings metrics and forecasts."""
|
||||
|
||||
# Upcoming earnings
|
||||
next_announcement_date: Optional[str] = None
|
||||
announcement_timing: Optional[str] = None
|
||||
analysts_covering: Optional[int] = None
|
||||
consensus_estimate: Optional[float] = None
|
||||
estimate_high: Optional[float] = None
|
||||
estimate_low: Optional[float] = None
|
||||
|
||||
# Historical earnings
|
||||
eps_ttm: Optional[float] = None
|
||||
revenue_ttm: Optional[float] = None # Stored in dollars
|
||||
pe_ttm: Optional[float] = None
|
||||
forward_pe: Optional[float] = None
|
||||
peg_ratio: Optional[float] = None
|
||||
|
||||
# Beat/miss history (simplified for Phase 1)
|
||||
recent_beats: list[dict] = field(default_factory=list)
|
||||
future_estimates: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class CalculatedMetrics:
|
||||
"""Calculated metrics derived from other data."""
|
||||
|
||||
payout_ratio: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class EquityPhase1Data:
|
||||
"""Complete Phase 1 enhanced equity data."""
|
||||
|
||||
ticker: str
|
||||
quote: Optional[QuoteData] = None
|
||||
dividends: Optional[EnhancedDividends] = None
|
||||
earnings: Optional[EarningsData] = None
|
||||
calculated_metrics: Optional[CalculatedMetrics] = None
|
||||
|
||||
|
||||
30
schwab_scraper/core/errors.py
Normal file
30
schwab_scraper/core/errors.py
Normal file
@@ -0,0 +1,30 @@
|
||||
class ScraperError(Exception):
|
||||
"""Base class for scraper-related errors."""
|
||||
|
||||
|
||||
class SessionExpiredError(ScraperError):
|
||||
pass
|
||||
|
||||
|
||||
class LoginError(ScraperError):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidTickerError(ScraperError):
|
||||
pass
|
||||
|
||||
|
||||
class NoDataError(ScraperError):
|
||||
pass
|
||||
|
||||
|
||||
class DownloadError(ScraperError):
|
||||
pass
|
||||
|
||||
|
||||
class PdfParseError(ScraperError):
|
||||
pass
|
||||
|
||||
|
||||
class NavigationError(ScraperError):
|
||||
pass
|
||||
66
schwab_scraper/core/models.py
Normal file
66
schwab_scraper/core/models.py
Normal file
@@ -0,0 +1,66 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List
|
||||
|
||||
@dataclass
|
||||
class DividendsData:
|
||||
previous_payment: Optional[str] = None
|
||||
previous_pay_date: Optional[str] = None
|
||||
previous_ex_date: Optional[str] = None
|
||||
frequency: Optional[str] = None
|
||||
annual_dividend_rate: Optional[str] = None
|
||||
annual_dividend_yield: Optional[str] = None
|
||||
|
||||
@dataclass
|
||||
class MorningstarPdfData:
|
||||
fair_value: Optional[str] = None
|
||||
economic_moat: Optional[str] = None
|
||||
capital_allocation: Optional[str] = None
|
||||
rating: Optional[int] = None
|
||||
one_star_price: Optional[str] = None
|
||||
five_star_price: Optional[str] = None
|
||||
assessment: Optional[str] = None
|
||||
range_52_week: Optional[str] = None
|
||||
dividend_yield: Optional[str] = None
|
||||
investment_style: Optional[str] = None
|
||||
report_url: Optional[str] = None
|
||||
report_date: Optional[str] = None
|
||||
|
||||
@dataclass
|
||||
class ScrapeResult:
|
||||
ticker: str
|
||||
company_name: Optional[str]
|
||||
dividends: DividendsData
|
||||
morningstar: MorningstarPdfData
|
||||
source: str # "live" | "cache"
|
||||
|
||||
|
||||
# -------------------- Transactions Feature --------------------
|
||||
|
||||
@dataclass
|
||||
class AccountInfo:
|
||||
account_type: str # e.g., "Joint", "IRA", "Individual"
|
||||
account_ending: str # e.g., "604", "197", "873"
|
||||
full_description: str # e.g., "Joint …604 (Account ending in 6 0 4)"
|
||||
is_selected: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransactionRecord:
|
||||
date: str
|
||||
action: str
|
||||
symbol: Optional[str]
|
||||
description: str
|
||||
quantity: Optional[str]
|
||||
price: Optional[str]
|
||||
fees_comm: Optional[str]
|
||||
amount: Optional[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TransactionData:
|
||||
account_info: AccountInfo
|
||||
transactions: List[TransactionRecord]
|
||||
date_range: str
|
||||
export_date: str
|
||||
total_transactions: int
|
||||
source: str # "live" | "cache"
|
||||
Reference in New Issue
Block a user