Fix build: Bundle schwab_scraper source and use local dependencies

2026-04-24 01:50:20 +00:00
parent 02ac293692
commit 650ea2d087
43 changed files with 10900 additions and 41 deletions
--- a/.gitea/workflows/build.yaml
+++ b/.gitea/workflows/build.yaml
@@ -28,5 +28,3 @@ jobs:
          context: .
          push: true
          tags: gitea.ext.ben.io/${{ gitea.repository }}:latest
          build-args: |
            GITEA_TOKEN=${{ secrets.CR_PAT }}
--- a/28
+++ b/28
@@ -4,29 +4,13 @@ ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
 WORKDIR /app
-# Install git for dependency installation
+# Copy dependency files and install
-RUN apt-get update && apt-get install -y --no-install-recommends \
+COPY pyproject.toml uv.lock ./
-    git \
+RUN uv sync --frozen --no-dev --no-install-project
    && rm -rf /var/lib/apt/lists/*
-# Use Gitea PAT for private dependencies if provided
+# Copy project files
-ARG GITEA_TOKEN
+COPY . .
-RUN if [ -n "$GITEA_TOKEN" ]; then \
+RUN uv sync --frozen --no-dev
    git config --global url."https://${GITEA_TOKEN}@gitea.ext.ben.io/".insteadOf "https://gitea.ext.ben.io/"; \
    fi
 # Install dependencies
 RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    uv sync --frozen --no-install-project --no-dev
 # Copy the rest of the application
 COPY . /app
 # Install the project
 RUN --mount=type=cache,target=/root/.cache/uv \
    uv sync --frozen --no-dev
 FROM python:3.12-slim-bookworm
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,13 @@ dependencies = [
    "fastmcp>=0.4.1",
    "starlette>=0.41.0",
    "uvicorn>=0.32.0",
-    "schwab-scraper @ git+https://gitea.ext.ben.io/b3nw/schwab-scraper.git",
+    "aiohttp>=3.9.0",
    "fastapi>=0.136.1",
    "greenlet>=3.2.3",
    "pdfplumber>=0.11.4",
    "playwright==1.54.0",
    "pyee>=13.0.0",
    "typing-extensions>=4.14.0",
 ]
 [build-system]
@@ -20,4 +26,4 @@ build-backend = "hatchling.build"
 allow-direct-references = true
 [tool.hatch.build.targets.wheel]
-packages = []
+packages = ["schwab_scraper"]
--- a/schwab_scraper/init.py
+++ b/schwab_scraper/init.py
@@ -0,0 +1,37 @@
 """Public package exports sync wrappers and unified API references."""
 from .api import (
    get_morningstar_data,
    get_transaction_history,
    get_transaction_history_enhanced,
    list_accounts,
    get_account_overview,
    get_positions,
    get_portfolio_snapshot,
    refresh_session,
    check_session_health,
    get_session_status,
    get_session_info,
    ensure_valid_session,
    export_cookies,
    set_cookies,
    list_available_accounts,
 )
 __all__ = [
    "get_morningstar_data",
    "get_transaction_history",
    "get_transaction_history_enhanced",
    "list_accounts",
    "get_account_overview",
    "get_positions",
    "get_portfolio_snapshot",
    "refresh_session",
    "check_session_health",
    "get_session_status",
    "get_session_info",
    "ensure_valid_session",
    "export_cookies",
    "set_cookies",
    "list_available_accounts",
 ]
--- a/schwab_scraper/main.py
+++ b/schwab_scraper/main.py
@@ -0,0 +1,7 @@
 #!/usr/bin/env python3
 """Main entry point for the schwab-morningstar-scraper package when run with python3 -m."""
 from .cli import main
 if __name__ == "__main__":
    main()
--- a/schwab_scraper/api.py
+++ b/schwab_scraper/api.py
@@ -0,0 +1,102 @@
 import asyncio
 from . import unified_api
 from .browser.session import get_session_info as _session_info
 def get_morningstar_data(ticker: str, debug: bool = False):
    """Synchronous wrapper for `unified_api.get_morningstar_data`"""
    return asyncio.run(unified_api.get_morningstar_data(ticker, debug=debug))
 def get_transaction_history(account=None, start_date=None, end_date=None, time_period=None, debug=False):
    """Synchronous wrapper for `unified_api.get_transaction_history`"""
    return asyncio.run(
        unified_api.get_transaction_history(
            account=account,
            start_date=start_date,
            end_date=end_date,
            time_period=time_period,
            debug=debug,
        )
    )
 def get_transaction_history_enhanced(account=None, start_date=None, end_date=None, time_period=None, debug=False):
    """Synchronous wrapper for enhanced transaction history."""
    return asyncio.run(
        unified_api.get_transaction_history_enhanced(
            account=account,
            start_date=start_date,
            end_date=end_date,
            time_period=time_period,
            debug=debug,
        )
    )
 def list_accounts(debug: bool = False):
    """Synchronous wrapper for account discovery."""
    return asyncio.run(unified_api.list_accounts(debug=debug))
 def get_account_overview(account=None, debug: bool = False):
    return asyncio.run(unified_api.get_account_overview(account=account, debug=debug))
 def get_positions(account=None, include_non_equity: bool = False, debug: bool = False):
    return asyncio.run(
        unified_api.get_positions(
            account=account,
            include_non_equity=include_non_equity,
            debug=debug,
        )
    )
 def get_portfolio_snapshot(account=None, aggregate_by_symbol: bool = True, include_non_equity: bool = False, debug: bool = False):
    return asyncio.run(
        unified_api.get_portfolio_snapshot(
            account=account,
            aggregate_by_symbol=aggregate_by_symbol,
            include_non_equity=include_non_equity,
            debug=debug,
        )
    )
 def refresh_session(debug: bool = False):
    return asyncio.run(unified_api.refresh_session(debug=debug))
 def check_session_health(debug: bool = False):
    envelope = asyncio.run(unified_api.get_session_status(debug=debug))
    return envelope["success"]
 def get_session_status(debug: bool = False):
    return asyncio.run(unified_api.get_session_status(debug=debug))
 def get_session_info(debug: bool = False):
    return _session_info()
 def ensure_valid_session(debug: bool = False):
    envelope = asyncio.run(unified_api.refresh_session(debug=debug))
    return envelope["success"]
 def export_cookies(cookies_path: str, debug: bool = False):
    """Synchronous wrapper for exporting cookies."""
    return asyncio.run(unified_api.export_cookies(cookies_path, debug=debug))
 def set_cookies(cookies_path: str, debug: bool = False):
    """Synchronous wrapper for setting cookies."""
    return asyncio.run(unified_api.set_cookies(cookies_path, debug=debug))
 def list_available_accounts(debug: bool = False):
    """Synchronous wrapper for listing available transaction accounts."""
    return asyncio.run(unified_api.list_available_accounts(debug=debug))
--- a/schwab_scraper/browser/init.py
+++ b/schwab_scraper/browser/init.py
@@ -0,0 +1,20 @@
 from .client import connect, new_context, new_page
 from .navigation import goto_with_auth_check
 from .session import (
    export_cookies,
    get_session_status,
    refresh_session,
    set_cookies_from_file,
 )
 __all__ = [
    "connect",
    "new_context",
    "new_page",
    "goto_with_auth_check",
    "get_session_status",
    "refresh_session",
    "set_cookies_from_file",
    "export_cookies",
 ]
--- a/schwab_scraper/browser/auth.py
+++ b/schwab_scraper/browser/auth.py
--- a/schwab_scraper/browser/client.py
+++ b/schwab_scraper/browser/client.py
@@ -0,0 +1,30 @@
 from typing import Any
 from playwright.async_api import async_playwright
 async def connect(playwright_url: str):
    p = await async_playwright().start()
    browser = await p.chromium.connect(playwright_url)
    return p, browser
 async def new_context(browser, cookies: list[dict] | None = None, user_agent: str | None = None):
    context = await browser.new_context(
        user_agent=user_agent or 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
    )
    if cookies:
        valid_same_site_values = ['Strict', 'Lax', 'None']
        for cookie in cookies:
            if cookie.get('sameSite') not in valid_same_site_values:
                if cookie.get('sameSite') == 'no_restriction':
                    cookie['sameSite'] = 'None'
                else:
                    cookie['sameSite'] = 'Lax'
        await context.add_cookies(cookies)  # type: ignore
    return context
 async def new_page(context):
    return await context.new_page()
--- a/schwab_scraper/browser/navigation.py
+++ b/schwab_scraper/browser/navigation.py
@@ -0,0 +1,38 @@
 async def ensure_authenticated_page(page, context, debug: bool = False) -> bool:
    if 'login' in page.url.lower() or 'sessiontimeout=y' in page.url.lower():
        if debug:
            print("DEBUG: Detected session timeout, attempting re-authentication...")
        from ..core.config import load_config, get_schwab_credentials  # adjusted after refactor
        from .auth import login_to_schwab
        config = load_config()
        username, password = get_schwab_credentials(config)
        if username and password:
            fresh_cookies = await login_to_schwab(username, password)
            if fresh_cookies:
                await context.clear_cookies()
                await context.add_cookies(fresh_cookies)
                if debug:
                    print("DEBUG: Re-authentication successful")
                return True
            else:
                if debug:
                    print("DEBUG: Re-authentication failed")
                return False
        else:
            if debug:
                print("DEBUG: No credentials available for re-authentication")
            return False
    return True
 async def goto_with_auth_check(page, context, url: str, debug: bool = False, timeout: int = 60000):
    await page.goto(url, timeout=timeout)
    await page.wait_for_load_state('domcontentloaded')
    if not await ensure_authenticated_page(page, context, debug=debug):
        return False
    if 'login' in page.url.lower() or 'sessiontimeout=y' in page.url.lower():
        await page.goto(url, timeout=timeout)
        await page.wait_for_load_state('domcontentloaded')
    return True
--- a/schwab_scraper/browser/session.py
+++ b/schwab_scraper/browser/session.py
@@ -0,0 +1,470 @@
 """
 Session management module for maintaining Schwab authenticated sessions.
 This module provides functionality to refresh session state through browser navigation
 without requiring 2FA approval for active sessions.
 """
 import json
 import logging
 import time
 from typing import List, Dict, Any, Optional
 from datetime import datetime
 from playwright.async_api import async_playwright
 from ..core.config import load_config, get_playwright_url, get_cookies_path
 from .client import new_context, new_page
 from ..core import ErrorType, Envelope, fail, ok
 async def refresh_session_state(cookies: Optional[List[Dict[str, Any]]] = None) -> bool:
    """
    Refresh session state through browser navigation.
    This function maintains active sessions by navigating to a Schwab page,
    which updates cookie expiration times and session state without requiring
    2FA approval for active sessions.
    Args:
        cookies: Optional list of cookies to use. If None, loads from cookies.json
    Returns:
        bool: True if session refresh was successful, False otherwise
    """
    logger = logging.getLogger(__name__)
    try:
        logger.info("Starting session refresh through navigation")
        # Load cookies if not provided
        if cookies is None:
            cookies_path = get_cookies_path()
            try:
                with open(cookies_path, 'r') as f:
                    cookies = json.load(f)
                logger.info(f"Loaded {len(cookies) if cookies else 0} cookies from {cookies_path}")
            except (FileNotFoundError, json.JSONDecodeError) as e:
                logger.error(f"Could not load cookies: {e}")
                return False
        if not cookies:
            logger.error("No cookies available for session refresh")
            return False
        config = load_config()
        playwright_url = get_playwright_url(config)
        async with async_playwright() as p:
            try:
                browser = await p.chromium.connect(playwright_url)
            except Exception as e:
                logger.error(f"Failed to connect to browser: {e}")
                return False
            try:
                # Create context with existing cookies
                context = await new_context(browser, cookies=cookies)
                page = await new_page(context)
                # Navigate to refresh session state
                logger.info("Navigating to Schwab research page to refresh session")
                await page.goto("https://client.schwab.com/app/research/#/stocks/AAPL", timeout=30000)
                await page.wait_for_timeout(2000)  # Let page settle and cookies update
                # Check if navigation was successful (no redirect to login)
                current_url = page.url
                is_redirected = any(pattern in current_url for pattern in [
                    '/login', '/signin', '/auth', '/Access/'
                ])
                if is_redirected:
                    logger.warning(f"Session refresh failed: redirected to login page")
                    logger.debug(f"Current URL: {current_url}")
                    await context.close()
                    await browser.close()
                    return False
                # Get updated cookies after navigation
                new_cookies = await context.cookies()
                logger.info(f"Retrieved {len(new_cookies)} cookies after navigation")
                # Check if we still have critical session cookies
                critical_session_cookies = ['LVAL', 'NS2', 'sstate']
                missing_critical_cookies = []
                for cookie_name in critical_session_cookies:
                    old_cookie = next((c for c in cookies if c['name'] == cookie_name), None)
                    new_cookie = next((c for c in new_cookies if c['name'] == cookie_name), None)
                    if not new_cookie:
                        missing_critical_cookies.append(cookie_name)
                    elif old_cookie and new_cookie.get('expires') != -1:
                        # Session cookies should have expires = -1
                        missing_critical_cookies.append(f"{cookie_name} (invalid session cookie)")
                if missing_critical_cookies:
                    logger.warning(f"Session refresh failed: missing critical session cookies: {missing_critical_cookies}")
                    await context.close()
                    await browser.close()
                    return False
                # Compare cookie states to detect changes
                changes = []
                old_dict = {c['name']: c for c in cookies}
                new_dict = {c['name']: c for c in new_cookies}
                # Check for modified cookies (especially expiration changes)
                for name in old_dict:
                    if name in new_dict:
                        old_cookie = old_dict[name]
                        new_cookie = new_dict[name]
                        # Check if expiration changed
                        old_expires = old_cookie.get('expires', -1)
                        new_expires = new_cookie.get('expires', -1)
                        if old_expires != new_expires:
                            changes.append({
                                'type': 'expiration_changed',
                                'name': name,
                                'old_expires': old_expires,
                                'new_expires': new_expires
                            })
                if changes:
                    logger.info(f"Detected {len(changes)} cookie changes (session refreshed)")
                    for change in changes[:3]:  # Show first 3
                        logger.debug(f"  {change['name']}: expiration updated")
                else:
                    logger.info("No cookie changes detected (session maintained)")
                # Save updated cookies
                cookies_path = get_cookies_path()
                with open(cookies_path, 'w') as f:
                    json.dump(new_cookies, f, indent=2)
                logger.info(f"Saved {len(new_cookies)} updated cookies")
                await context.close()
                await browser.close()
                return True
            except Exception as e:
                logger.error(f"Error during session refresh: {e}")
                try:
                    await context.close()
                except:
                    pass
                await browser.close()
                return False
    except Exception as e:
        logger.error(f"Session refresh failed: {e}")
        return False
 async def maintain_session_health() -> bool:
    """
    Check if the current session is healthy by attempting a simple navigation.
    Returns:
        bool: True if session is healthy, False if refresh is needed
    """
    logger = logging.getLogger(__name__)
    try:
        logger.info("Checking session health")
        # Load current cookies
        cookies_path = get_cookies_path()
        try:
            with open(cookies_path, 'r') as f:
                cookies = json.load(f)
        except (FileNotFoundError, json.JSONDecodeError):
            logger.error("No valid cookies found")
            return False
        if not cookies:
            logger.error("No cookies available")
            return False
        # First, check if we have valid session cookies (basic check)
        current_time = int(time.time())
        has_valid_session_cookies = False
        for cookie in cookies:
            name = cookie.get('name', '')
            expires = cookie.get('expires', -1)
            # Check for actual Schwab session cookies
            if name in ['auth', 'ASP.NET_SessionId', 'SessionInfo', '__RequestVerificationToken']:
                # Session cookies (expires=-1) are valid until browser closes
                # Other cookies must not be expired
                if expires == -1 or (expires and expires > current_time):
                    has_valid_session_cookies = True
                    break
        if not has_valid_session_cookies:
            logger.warning("Session health check: FAILED - no valid session cookies found")
            return False
        config = load_config()
        playwright_url = get_playwright_url(config)
        async with async_playwright() as p:
            browser = await p.chromium.connect(playwright_url)
            try:
                context = await new_context(browser, cookies=cookies)
                page = await new_page(context)
                # Navigate to a simple page to test session
                await page.goto("https://client.schwab.com/app/research/#/stocks/AAPL", timeout=30000)
                # Check if we're still authenticated by URL pattern
                current_url = page.url
                logger.debug(f"Current URL after navigation: {current_url}")
                is_authenticated_by_url = any(pattern in current_url for pattern in [
                    '/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/summary'
                ])
                # Check for login redirect patterns
                is_redirected = any(pattern in current_url for pattern in [
                    '/login', '/signin', '/auth', '/Access/'
                ])
                logger.debug(f"Authenticated by URL pattern: {is_authenticated_by_url}")
                logger.debug(f"Redirected to login: {is_redirected}")
                # Primary check: If we're not redirected and have a good URL pattern, we're authenticated
                if is_authenticated_by_url and not is_redirected:
                    logger.info("Session health check: PASSED - authenticated URL detected")
                    result = True
                elif is_redirected:
                    logger.warning("Session health check: FAILED - redirect to login detected")
                    result = False
                else:
                    # Secondary check: Look for any page content that indicates we're not on a login page
                    try:
                        # Check for login form elements
                        login_indicators = [
                            'input[type="password"]',
                            'input[name*="login"]',
                            'input[name*="user"]',
                            'input[id*="login"]',
                            'input[id*="user"]',
                            'button:has-text("Log In")',
                            'button:has-text("Sign In")'
                        ]
                        login_found = False
                        for selector in login_indicators:
                            login_element = await page.query_selector(selector)
                            if login_element:
                                login_found = True
                                break
                        if login_found:
                            logger.warning("Session health check: FAILED - login form detected")
                            result = False
                        else:
                            logger.info("Session health check: PASSED - no login form detected")
                            result = True
                    except Exception as e:
                        logger.debug(f"Login form check error: {e}")
                        # If we can't check, assume healthy if we have valid cookies and no redirect
                        logger.info("Session health check: PASSED - based on cookies and URL")
                        result = True
                await context.close()
                await browser.close()
                return result
            except Exception as e:
                logger.error(f"Session health check error: {e}")
                try:
                    await context.close()
                except:
                    pass
                await browser.close()
                return False
    except Exception as e:
        logger.error(f"Session health check failed: {e}")
        return False
 def get_session_info() -> Dict[str, Any]:
    """
    Get information about the current session state.
    Returns:
        Dict containing session information
    """
    cookies_path = get_cookies_path()
    try:
        with open(cookies_path, 'r') as f:
            cookies = json.load(f)
        session_cookies = []
        expiring_cookies = []
        current_time = datetime.now().timestamp()
        for cookie in cookies:
            name = cookie.get('name', '')
            expires = cookie.get('expires', -1)
            # Check if this is a session-related cookie
            if any(keyword in name.lower() for keyword in ['session', 'auth', 'token']):
                session_cookies.append({
                    'name': name,
                    'domain': cookie.get('domain', ''),
                    'expires': expires,
                    'is_session_cookie': expires == -1
                })
                if expires != -1 and expires > 0:
                    days_until_expire = (expires - current_time) / (24 * 3600)
                    if days_until_expire < 7:  # Expiring within a week
                        expiring_cookies.append({
                            'name': name,
                            'days_until_expire': days_until_expire
                        })
        return {
            'total_cookies': len(cookies),
            'session_cookies': len(session_cookies),
            'expiring_cookies': len(expiring_cookies),
            'expiring_soon': expiring_cookies,
            'session_status': 'active' if session_cookies else 'no_session_cookies'
        }
    except (FileNotFoundError, json.JSONDecodeError):
        return {
            'error': 'No valid cookies found',
            'total_cookies': 0,
            'session_cookies': 0,
            'expiring_cookies': 0,
            'expiring_soon': [],
            'session_status': 'missing_cookies'
        }
 async def ensure_valid_session() -> bool:
    """
    Ensure we have a valid session, attempting refresh if needed.
    Returns:
        bool: True if a valid session exists or was successfully refreshed
    """
    logger = logging.getLogger(__name__)
    # First check if we have any cookies
    cookies_path = get_cookies_path()
    try:
        with open(cookies_path, 'r') as f:
            cookies = json.load(f)
        if not cookies:
            logger.error("No cookies available")
            return False
    except (FileNotFoundError, json.JSONDecodeError):
        logger.error("No valid cookies found")
        return False
    # Check session health
    if await maintain_session_health():
        logger.info("Session is healthy")
        return True
    # Session needs refresh
    logger.info("Session needs refresh, attempting navigation refresh")
    return await refresh_session_state(cookies)
 async def get_session_status(debug: bool = False) -> Envelope[dict]:
    logger = logging.getLogger(__name__)
    try:
        # First get basic cookie information
        info = get_session_info()
        # If we have session cookies, validate they actually work with Schwab
        if info.get('session_status') == 'active':
            logger.debug("Session cookies found, validating with Schwab...")
            # Use maintain_session_health to actually test the session
            is_healthy = await maintain_session_health()
            if not is_healthy:
                # Update status to reflect that cookies exist but are invalid
                info['session_status'] = 'invalid'
                info['validation_error'] = 'Session cookies exist but Schwab authentication failed'
                logger.warning("Session validation failed: cookies present but not accepted by Schwab")
            else:
                logger.debug("Session validation succeeded")
        logger.debug("Session status info: %s", info)
        return ok(info)
    except Exception as exc:
        logger.exception("Failed to gather session status")
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
 async def refresh_session(debug: bool = False) -> Envelope[None]:
    logger = logging.getLogger(__name__)
    try:
        refreshed = await refresh_session_state()
        if refreshed:
            logger.info("Session refresh succeeded")
            return ok(None)
        logger.warning("Session refresh failed")
        return fail("Session refresh failed", ErrorType.AUTHENTICATION, retryable=True)
    except Exception as exc:
        logger.exception("Exception during session refresh")
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
 async def set_cookies_from_file(path: str, debug: bool = False) -> Envelope[None]:
    logger = logging.getLogger(__name__)
    try:
        with open(path, "r") as fh:
            cookies = json.load(fh)
        cookies_path = get_cookies_path()
        with open(cookies_path, "w") as fh:
            json.dump(cookies, fh, indent=2)
        logger.info("Imported %s cookies from %s", len(cookies), path)
        return ok(None)
    except (FileNotFoundError, json.JSONDecodeError) as exc:
        logger.error("Failed to load cookies from %s: %s", path, exc)
        return fail(str(exc), ErrorType.VALIDATION, retryable=False)
    except Exception as exc:
        logger.exception("Unexpected error importing cookies from %s", path)
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
 async def export_cookies(path: str, debug: bool = False) -> Envelope[None]:
    logger = logging.getLogger(__name__)
    cookies_path = get_cookies_path()
    try:
        with open(cookies_path, "r") as fh:
            cookies = json.load(fh)
        with open(path, "w") as fh:
            json.dump(cookies, fh, indent=2)
        logger.info("Exported %s cookies to %s", len(cookies), path)
        return ok(None)
    except (FileNotFoundError, json.JSONDecodeError) as exc:
        logger.error("Failed to read cookies for export: %s", exc)
        return fail(str(exc), ErrorType.AUTHENTICATION, retryable=False)
    except Exception as exc:
        logger.exception("Unexpected error exporting cookies to %s", path)
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
--- a/schwab_scraper/cli.py
+++ b/schwab_scraper/cli.py
@@ -0,0 +1,190 @@
 import asyncio
 import argparse
 import json
 import os
 from dataclasses import asdict, is_dataclass
 from typing import Any
 from . import unified_api
 from .browser.auth import login_to_schwab
 from .core.config import load_config, get_schwab_credentials, set_config_path, set_cookies_path
 def _to_serializable(obj: Any) -> Any:
    if is_dataclass(obj):
        return asdict(obj)
    if isinstance(obj, list):
        return [_to_serializable(item) for item in obj]
    if isinstance(obj, dict):
        return {key: _to_serializable(value) for key, value in obj.items()}
    return obj
 def _print_envelope(envelope):
    payload = dict(envelope)
    payload["data"] = _to_serializable(payload.get("data"))
    print(json.dumps(payload, indent=2, default=str))
 async def test_scraper(ticker: str, debug: bool):
    """Test the get_morningstar_data function."""
    print(f"Running scraper test for ticker: {ticker}")
    data = await unified_api.get_morningstar_data(ticker, debug=debug)
    _print_envelope(data)
 async def async_main():
    parser = argparse.ArgumentParser(description="Schwab Morningstar Scraper CLI")
    parser.add_argument("ticker", nargs='?', help="Stock ticker to scrape")
    parser.add_argument("--debug", action="store_true", help="Enable debug output")
    parser.add_argument("--login", action="store_true", help="Login only (don't scrape)")
    parser.add_argument("--test", action="store_true", help="Test mode")
    parser.add_argument("--phase1", action="store_true", help="Extract Phase 1 enhanced equity data (quote, dividends, earnings, valuation ratios)")
    # Configuration file paths
    parser.add_argument("--config-path", metavar="PATH", help="Custom path for config.json file")
    parser.add_argument("--cookies-path", metavar="PATH", help="Custom path for cookies.json file")
    # Session commands
    parser.add_argument("--session-status", action="store_true", help="Display current session status")
    parser.add_argument("--export-cookies", metavar="PATH", help="Export cookies to file")
    parser.add_argument("--set-cookies", metavar="PATH", help="Load cookies from file")
    # Transactions + accounts
    parser.add_argument("--transactions", action="store_true", help="Export and parse transaction history")
    parser.add_argument("--list-accounts", action="store_true", help="List available accounts")
    parser.add_argument("--account", help="Account identifier (ending digits like 604 or name like Joint)")
    parser.add_argument("--start-date", help="Start date for custom range (YYYY-MM-DD)")
    parser.add_argument("--end-date", help="End date for custom range (YYYY-MM-DD)")
    parser.add_argument("--time-period", help="Preset period (e.g., 'Current Month', 'Last 6 Months')")
    # Accounts & positions
    parser.add_argument("--account-overview", nargs='?', const="", help="Show balances for account or aggregate if omitted")
    parser.add_argument("--positions", nargs='?', const="", help="Show positions for account or aggregate if omitted")
    parser.add_argument("--portfolio-snapshot", nargs='?', const="", help="Show portfolio snapshot for account or aggregate if omitted")
    parser.add_argument("--include-non-equity", action="store_true", help="Include non-equity positions")
    parser.add_argument("--no-aggregate", action="store_true", help="Disable symbol aggregation in portfolio snapshot")
    args = parser.parse_args()
    # Apply custom path overrides if provided
    if args.config_path:
        if not os.path.exists(args.config_path):
            print(f"Error: Config file not found: {args.config_path}")
            return
        set_config_path(args.config_path)
    if args.cookies_path:
        # Note: cookies.json may not exist yet (created on first login)
        # so we don't validate existence, only that parent directory exists
        cookies_dir = os.path.dirname(args.cookies_path)
        if cookies_dir and not os.path.exists(cookies_dir):
            print(f"Error: Directory for cookies file does not exist: {cookies_dir}")
            return
        set_cookies_path(args.cookies_path)
    if args.login:
        # Set up debug logging when --debug is used
        if args.debug:
            import logging
            logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(name)s: %(message)s')
            print("Debug logging enabled")
        config = load_config()
        username, password = get_schwab_credentials(config)
        if username and password:
            print("Attempting to log in...")
            if args.debug:
                print(f"Using browserless server: {config.get('playwright', {}).get('url', 'default')}")
            cookies = await login_to_schwab(username, password)
            if cookies:
                print("Login successful and cookies saved.")
                print(f"Saved {len(cookies)} cookies to cookies.json")
            else:
                print("Login failed.")
        else:
            print("Schwab username and password not found in config.json.")
        return
    if args.session_status:
        envelope = await unified_api.get_session_status(debug=args.debug)
        _print_envelope(envelope)
        return
    if args.set_cookies:
        envelope = await unified_api.set_cookies(args.set_cookies, debug=args.debug)
        _print_envelope(envelope)
        return
    if args.export_cookies:
        envelope = await unified_api.export_cookies(args.export_cookies, debug=args.debug)
        _print_envelope(envelope)
        return
    if args.list_accounts:
        envelope = await unified_api.list_accounts(debug=args.debug)
        _print_envelope(envelope)
        return
    if args.account_overview is not None:
        account_arg = args.account_overview or None
        envelope = await unified_api.get_account_overview(account=account_arg, debug=args.debug)
        _print_envelope(envelope)
        return
    if args.positions is not None:
        account_arg = args.positions or None
        envelope = await unified_api.get_positions(
            account=account_arg,
            include_non_equity=args.include_non_equity,
            debug=args.debug,
        )
        _print_envelope(envelope)
        return
    if args.portfolio_snapshot is not None:
        account_arg = args.portfolio_snapshot or None
        envelope = await unified_api.get_portfolio_snapshot(
            account=account_arg,
            aggregate_by_symbol=not args.no_aggregate,
            include_non_equity=args.include_non_equity,
            debug=args.debug,
        )
        _print_envelope(envelope)
        return
    if args.transactions:
        envelope = await unified_api.get_transaction_history(
            account=args.account,
            start_date=args.start_date,
            end_date=args.end_date,
            time_period=args.time_period,
            debug=args.debug,
        )
        _print_envelope(envelope)
        return
    if args.ticker:
        if args.test:
            await test_scraper(args.ticker, args.debug)
        elif args.phase1:
            print(f"Extracting Phase 1 enhanced equity data for {args.ticker}...")
            envelope = await unified_api.get_equity_phase1_data(args.ticker, debug=args.debug)
            _print_envelope(envelope)
        else:
            print(f"Scraping Morningstar data for {args.ticker}...")
            envelope = await unified_api.get_morningstar_data(args.ticker, debug=args.debug)
            _print_envelope(envelope)
        return
    parser.print_help()
 def main():
    """Entry point for console script"""
    asyncio.run(async_main())
 if __name__ == "__main__":
    main()
--- a/schwab_scraper/core/init.py
+++ b/schwab_scraper/core/init.py
@@ -0,0 +1,20 @@
 from .contracts import (  # noqa: F401
    Envelope,
    ErrorType,
    AccountOverview,
    AccountSummary,
    Lot,
    MorningstarData,
    PortfolioSnapshot,
    Position,
    SessionStatus,
    Transaction,
    # Phase 1 data structures
    QuoteData,
    EnhancedDividends,
    EarningsData,
    CalculatedMetrics,
    EquityPhase1Data,
    fail,
    ok,
 )
--- a/schwab_scraper/core/config.py
+++ b/schwab_scraper/core/config.py
@@ -0,0 +1,134 @@
 import json
 import logging
 import os
 from typing import Optional
 # Module-level state for runtime path overrides
 _config_path_override: Optional[str] = None
 _cookies_path_override: Optional[str] = None
 def set_config_path(path: Optional[str]) -> None:
    """
    Set a custom path for config.json at runtime.
    This override takes precedence over environment variables and defaults.
    Note: This uses module-level state and is not thread-safe. Suitable for
    single-threaded CLI usage or single async operations.
    Args:
        path: Absolute or relative path to config file, or None to reset
    """
    global _config_path_override
    _config_path_override = path
 def set_cookies_path(path: Optional[str]) -> None:
    """
    Set a custom path for cookies.json at runtime.
    This override takes precedence over environment variables and defaults.
    Note: This uses module-level state and is not thread-safe. Suitable for
    single-threaded CLI usage or single async operations.
    Args:
        path: Absolute or relative path to cookies file, or None to reset
    """
    global _cookies_path_override
    _cookies_path_override = path
 def get_config_path() -> str:
    """
    Resolve the configuration file path using priority order:
    1. Runtime override (set_config_path)
    2. Environment variable SCHWAB_CONFIG_PATH
    3. Default locations (../config.json relative to module, then ./config.json)
    Returns:
        str: Path to configuration file
    """
    # Priority 1: Runtime override
    if _config_path_override:
        return _config_path_override
    # Priority 2: Environment variable
    env_path = os.environ.get('SCHWAB_CONFIG_PATH')
    if env_path:
        return env_path
    # Priority 3: Default locations
    # Try package root first (for development/installed package)
    default_path = os.path.join(os.path.dirname(__file__), '..', 'config.json')
    if os.path.exists(default_path):
        return default_path
    # Fall back to current working directory
    return 'config.json'
 def get_cookies_path() -> str:
    """
    Resolve the cookies file path using priority order:
    1. Runtime override (set_cookies_path)
    2. Environment variable SCHWAB_COOKIES_PATH
    3. Default location (./cookies.json in CWD)
    Returns:
        str: Path to cookies file
    """
    # Priority 1: Runtime override
    if _cookies_path_override:
        return _cookies_path_override
    # Priority 2: Environment variable
    env_path = os.environ.get('SCHWAB_COOKIES_PATH')
    if env_path:
        return env_path
    # Priority 3: Default location
    return 'cookies.json'
 def load_config():
    """Load configuration from config.json (or custom path if configured)"""
    logger = logging.getLogger(__name__)
    config_path = get_config_path()
    try:
        with open(config_path, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        logger.error(f"config.json not found at {config_path}. Please create one based on config.json.sample")
        return None
    except json.JSONDecodeError:
        logger.error(f"Invalid JSON in config file at {config_path}")
        return None
 def get_playwright_url(config=None):
    """Get the Playwright browserless URL from config"""
    import os
    env_url = os.environ.get('SCHWAB_PLAYWRIGHT_URL')
    if env_url:
        return env_url
    if config is None:
        config = load_config()
    if config and 'playwright' in config and 'url' in config['playwright']:
        return config['playwright']['url']
    else:
        # Default fallback URL
        return "ws://browser.local.ben.io:3000/playwright/chromium"
 def get_schwab_credentials(config=None):
    """Get Schwab credentials from config"""
    if config is None:
        config = load_config()
    if config and 'schwab' in config:
        return config['schwab'].get('username'), config['schwab'].get('password')
    else:
        return None, None
--- a/schwab_scraper/core/contracts.py
+++ b/schwab_scraper/core/contracts.py
@@ -0,0 +1,271 @@
 from __future__ import annotations
 from dataclasses import dataclass, field
 from datetime import datetime
 from decimal import Decimal
 from enum import Enum
 from typing import Generic, Optional, TypeVar
 from typing_extensions import TypedDict
 T = TypeVar("T")
 class ErrorType(str, Enum):
    """Categorisation for envelope failures."""
    AUTHENTICATION = "AUTHENTICATION"
    NETWORK = "NETWORK"
    PARSING = "PARSING"
    VALIDATION = "VALIDATION"
    UNKNOWN = "UNKNOWN"
 class Envelope(TypedDict, Generic[T]):
    """Standard response envelope for unified API operations."""
    success: bool
    data: Optional[T]
    error: Optional[str]
    error_type: Optional[ErrorType]
    retryable: bool
 def ok(data: T) -> Envelope[T]:
    """Create a success envelope containing the provided data."""
    return {
        "success": True,
        "data": data,
        "error": None,
        "error_type": None,
        "retryable": False,
    }
 def fail(
    error: str,
    error_type: ErrorType | str = ErrorType.UNKNOWN,
    retryable: bool = False,
 ) -> Envelope[None]:
    """Create a failure envelope with error metadata."""
    resolved_error_type: ErrorType
    if isinstance(error_type, ErrorType):
        resolved_error_type = error_type
    else:
        try:
            resolved_error_type = ErrorType(error_type)
        except ValueError:
            resolved_error_type = ErrorType.UNKNOWN
    return {
        "success": False,
        "data": None,
        "error": error,
        "error_type": resolved_error_type,
        "retryable": retryable,
    }
@dataclass(slots=True)
 class SessionStatus:
    """Represents the current authentication session state."""
    logged_in: bool
    session_age_minutes: Optional[int] = None
    last_refresh: Optional[datetime] = None
    needs_mfa: bool = False
    cookies_valid: bool = True
@dataclass(slots=True)
 class AccountSummary:
    """Summary details for a Schwab account."""
    id: str
    label: str
    type: str
    last4: Optional[str] = None
    is_margin: bool = False
@dataclass(slots=True)
 class AccountOverview:
    """Aggregated balance snapshot for an account."""
    account: AccountSummary
    total_value: Optional[Decimal] = None
    day_change: Optional[Decimal] = None
    day_change_pct: Optional[float] = None
    cash: Optional[Decimal] = None
    settled_cash: Optional[Decimal] = None
    buying_power: Optional[Decimal] = None
    margin_balance: Optional[Decimal] = None
@dataclass(slots=True)
 class Lot:
    """Individual lot information within a position."""
    acquired_date: Optional[str] = None
    quantity: Optional[float] = None
    cost_basis: Optional[Decimal] = None
    lot_id: Optional[str] = None
@dataclass(slots=True)
 class Position:
    """Holding data for a specific security."""
    symbol: str
    description: Optional[str] = None
    asset_type: Optional[str] = None
    quantity: Optional[float] = None
    market_price: Optional[Decimal] = None
    market_value: Optional[Decimal] = None
    cost_basis_total: Optional[Decimal] = None
    unrealized_gain: Optional[Decimal] = None
    unrealized_gain_pct: Optional[float] = None
    lots: list[Lot] = field(default_factory=list)
@dataclass(slots=True)
 class PortfolioSnapshot:
    """Aggregated view of equity holdings across accounts."""
    equities: list[Position]
    total_value: Optional[Decimal] = None
    count: int = 0
@dataclass(slots=True)
 class MorningstarData:
    """Unified Morningstar data payload (existing equity fields)."""
    ticker: str
    company_name: Optional[str] = None
    previous_dividend_payment: Optional[str] = None
    previous_pay_date: Optional[str] = None
    previous_ex_date: Optional[str] = None
    frequency: Optional[str] = None
    annual_dividend_rate: Optional[str] = None
    annual_dividend_yield: Optional[str] = None
    fair_value: Optional[str] = None
    economic_moat: Optional[str] = None
    capital_allocation: Optional[str] = None
    rating: Optional[int] = None
    one_star_price: Optional[str] = None
    five_star_price: Optional[str] = None
    assessment: Optional[str] = None
    range_52_week: Optional[str] = None
    dividend_yield: Optional[str] = None
    investment_style: Optional[str] = None
    report_url: Optional[str] = None
    report_date: Optional[str] = None
    source: Optional[str] = None
@dataclass(slots=True)
 class Transaction:
    """Normalized transaction record matching transactions feature."""
    date: str
    action: str
    symbol: Optional[str]
    description: str
    quantity: Optional[str]
    price: Optional[str]
    fees_comm: Optional[str]
    amount: Optional[str]
 # Phase 1 Data Structures
@dataclass(slots=True)
 class QuoteData:
    """Quote and price data from symbol bar."""
    price: Optional[float] = None
    change: Optional[float] = None
    change_percent: Optional[float] = None
    after_hours_price: Optional[float] = None
    after_hours_change: Optional[float] = None
    after_hours_change_percent: Optional[float] = None
    bid: Optional[float] = None
    ask: Optional[float] = None
    bid_ask_size: Optional[str] = None
    previous_close: Optional[float] = None
    open: Optional[float] = None
    volume: Optional[int] = None
    volume_vs_avg: Optional[str] = None
    day_range_low: Optional[float] = None
    day_range_high: Optional[float] = None
    week_52_low: Optional[float] = None
    week_52_high: Optional[float] = None
    market_cap: Optional[str] = None
    sector: Optional[str] = None
    exchange: Optional[str] = None
@dataclass(slots=True)
 class EnhancedDividends:
    """Enhanced dividend data including forward-looking information."""
    # Forward-looking data (Phase 1)
    next_payment: Optional[float] = None
    next_pay_date: Optional[str] = None
    next_ex_date: Optional[str] = None
    # Existing data
    frequency: Optional[str] = None
    annual_rate: Optional[float] = None
    annual_yield: Optional[float] = None
    previous_payment: Optional[float] = None
    previous_pay_date: Optional[str] = None
    previous_ex_date: Optional[str] = None
@dataclass(slots=True)
 class EarningsData:
    """Core earnings metrics and forecasts."""
    # Upcoming earnings
    next_announcement_date: Optional[str] = None
    announcement_timing: Optional[str] = None
    analysts_covering: Optional[int] = None
    consensus_estimate: Optional[float] = None
    estimate_high: Optional[float] = None
    estimate_low: Optional[float] = None
    # Historical earnings
    eps_ttm: Optional[float] = None
    revenue_ttm: Optional[float] = None  # Stored in dollars
    pe_ttm: Optional[float] = None
    forward_pe: Optional[float] = None
    peg_ratio: Optional[float] = None
    # Beat/miss history (simplified for Phase 1)
    recent_beats: list[dict] = field(default_factory=list)
    future_estimates: list[dict] = field(default_factory=list)
@dataclass(slots=True)
 class CalculatedMetrics:
    """Calculated metrics derived from other data."""
    payout_ratio: Optional[float] = None
@dataclass(slots=True)
 class EquityPhase1Data:
    """Complete Phase 1 enhanced equity data."""
    ticker: str
    quote: Optional[QuoteData] = None
    dividends: Optional[EnhancedDividends] = None
    earnings: Optional[EarningsData] = None
    calculated_metrics: Optional[CalculatedMetrics] = None
--- a/schwab_scraper/core/errors.py
+++ b/schwab_scraper/core/errors.py
@@ -0,0 +1,30 @@
 class ScraperError(Exception):
    """Base class for scraper-related errors."""
 class SessionExpiredError(ScraperError):
    pass
 class LoginError(ScraperError):
    pass
 class InvalidTickerError(ScraperError):
    pass
 class NoDataError(ScraperError):
    pass
 class DownloadError(ScraperError):
    pass
 class PdfParseError(ScraperError):
    pass
 class NavigationError(ScraperError):
    pass
--- a/schwab_scraper/core/models.py
+++ b/schwab_scraper/core/models.py
@@ -0,0 +1,66 @@
 from dataclasses import dataclass
 from typing import Optional, List
@dataclass
 class DividendsData:
    previous_payment: Optional[str] = None
    previous_pay_date: Optional[str] = None
    previous_ex_date: Optional[str] = None
    frequency: Optional[str] = None
    annual_dividend_rate: Optional[str] = None
    annual_dividend_yield: Optional[str] = None
@dataclass
 class MorningstarPdfData:
    fair_value: Optional[str] = None
    economic_moat: Optional[str] = None
    capital_allocation: Optional[str] = None
    rating: Optional[int] = None
    one_star_price: Optional[str] = None
    five_star_price: Optional[str] = None
    assessment: Optional[str] = None
    range_52_week: Optional[str] = None
    dividend_yield: Optional[str] = None
    investment_style: Optional[str] = None
    report_url: Optional[str] = None
    report_date: Optional[str] = None
@dataclass
 class ScrapeResult:
    ticker: str
    company_name: Optional[str]
    dividends: DividendsData
    morningstar: MorningstarPdfData
    source: str  # "live" | "cache"
 # -------------------- Transactions Feature --------------------
@dataclass
 class AccountInfo:
    account_type: str  # e.g., "Joint", "IRA", "Individual"
    account_ending: str  # e.g., "604", "197", "873"
    full_description: str  # e.g., "Joint …604 (Account ending in 6 0 4)"
    is_selected: bool = False
@dataclass
 class TransactionRecord:
    date: str
    action: str
    symbol: Optional[str]
    description: str
    quantity: Optional[str]
    price: Optional[str]
    fees_comm: Optional[str]
    amount: Optional[str]
@dataclass
 class TransactionData:
    account_info: AccountInfo
    transactions: List[TransactionRecord]
    date_range: str
    export_date: str
    total_transactions: int
    source: str  # "live" | "cache"
--- a/schwab_scraper/features/init.py
+++ b/schwab_scraper/features/init.py
--- a/schwab_scraper/features/accounts_positions/init.py
+++ b/schwab_scraper/features/accounts_positions/init.py
@@ -0,0 +1,14 @@
 """Unified accounts and positions feature package."""
 from .accounts_scraper import list_accounts
 from .overview_scraper import get_account_overview
 from .positions_scraper import get_positions
 from .portfolio_scraper import get_portfolio_snapshot
 __all__ = [
    "list_accounts",
    "get_account_overview",
    "get_positions",
    "get_portfolio_snapshot",
 ]
--- a/schwab_scraper/features/accounts_positions/accounts_scraper.py
+++ b/schwab_scraper/features/accounts_positions/accounts_scraper.py
@@ -0,0 +1,153 @@
 from __future__ import annotations
 import asyncio
 import re
 from typing import Optional
 from ...core import AccountSummary, Envelope, ErrorType, fail, ok
 from ...browser.client import connect, new_context, new_page
 from ...browser.navigation import goto_with_auth_check
 from ...browser.auth import ensure_cookies
 from ...core.config import get_playwright_url, load_config
 # Use the same URL as transactions feature for consistency and reliability
 TRANSACTION_HISTORY_URL = "https://client.schwab.com/app/accounts/history/#/"
 def _normalize_account_option(text: str, value: str) -> Optional[AccountSummary]:
    text = text.strip()
    if not text:
        return None
    normalized_text = re.sub(r"\s+", " ", text)
    last4_match = re.search(r"(\d{3,4})", normalized_text.replace(" ", ""))
    last4 = last4_match.group(1)[-4:] if last4_match else None
    type_match = re.search(r"^([A-Za-z&'\- ]+)", normalized_text)
    account_type = (type_match.group(1).strip() if type_match else "Account").replace(" ", "_")
    account_id_candidates = [candidate for candidate in (value.strip(), last4, normalized_text) if candidate]
    account_id = account_id_candidates[0] if account_id_candidates else normalized_text
    label = normalized_text
    is_margin = "margin" in normalized_text.lower()
    return AccountSummary(
        id=account_id,
        label=label,
        type=account_type,
        last4=last4,
        is_margin=is_margin,
    )
 async def list_accounts(debug: bool = False) -> Envelope[list[AccountSummary]]:
    """
    Discover accounts from Schwab transaction history page.
    Uses the robust account discovery logic from the transactions feature
    which handles multiple selector patterns and has enhanced reliability.
    """
    cookies = await ensure_cookies()
    if not cookies:
        return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
    config = load_config()
    playwright_url = get_playwright_url(config)
    playwright = browser = context = page = None
    try:
        playwright, browser = await connect(playwright_url)
        context = await new_context(browser, cookies=cookies)
        page = await new_page(context)
        if not await goto_with_auth_check(page, context, TRANSACTION_HISTORY_URL, debug=debug):
            return fail("Failed to load transaction history for account discovery.", ErrorType.AUTHENTICATION, retryable=True)
        # Allow page to fully load
        await asyncio.sleep(2)
        # Use the robust account discovery from transactions feature
        from ..transactions.scraper import discover_accounts_from_page
        discovered_accounts = await discover_accounts_from_page(page, debug=debug)
        if not discovered_accounts:
            return fail("Account dropdown not found on transaction history page.", ErrorType.PARSING, retryable=True)
        # Convert discovered accounts to AccountSummary objects
        accounts: list[AccountSummary] = []
        seen_ids: set[str] = set()
        for acc in discovered_accounts:
            # Create AccountSummary from discovered account info
            account_id = acc.get('ending', acc.get('label', ''))
            if account_id and account_id not in seen_ids:
                summary = AccountSummary(
                    id=account_id,
                    label=acc.get('label', ''),
                    type=acc.get('type', 'Account'),
                    last4=acc.get('ending', ''),
                    is_margin=False,  # Will be enhanced in future if needed
                )
                accounts.append(summary)
                seen_ids.add(account_id)
        if not accounts:
            return fail("No accounts discovered from Schwab transaction history.", ErrorType.PARSING, retryable=True)
        if debug:
            print(f"DEBUG: Successfully discovered {len(accounts)} accounts:")
            for acc in accounts:
                print(f"DEBUG: - {acc.label} (type: {acc.type}, last4: {acc.last4})")
        return ok(accounts)
    except Exception as exc:
        if debug:
            print(f"DEBUG: Account discovery error: {exc}")
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
    finally:
        await _safe_close_page(page)
        await _safe_close_context(context)
        await _safe_close_browser(browser)
        await _safe_stop_playwright(playwright)
 async def _safe_close_page(page) -> None:
    if page is None:
        return
    try:
        await page.close()
    except Exception:
        pass
 async def _safe_close_context(context) -> None:
    if context is None:
        return
    try:
        await context.close()
    except Exception:
        pass
 async def _safe_close_browser(browser) -> None:
    if browser is None:
        return
    try:
        await browser.close()
    except Exception:
        pass
 async def _safe_stop_playwright(playwright) -> None:
    if playwright is None:
        return
    try:
        await playwright.stop()
    except Exception:
        pass
--- a/schwab_scraper/features/accounts_positions/overview_scraper.py
+++ b/schwab_scraper/features/accounts_positions/overview_scraper.py
@@ -0,0 +1,426 @@
 from __future__ import annotations
 import asyncio
 import re
 from decimal import Decimal, InvalidOperation
 from typing import Any, Optional, Sequence
 from ...browser.auth import ensure_cookies
 from ...browser.client import connect, new_context, new_page
 from ...browser.navigation import goto_with_auth_check
 from ...core import AccountOverview, AccountSummary, Envelope, ErrorType, fail, ok
 from ...core.config import get_playwright_url, load_config
 SUMMARY_URL = "https://client.schwab.com/accounts/summary/summary.aspx/"
 def _parse_currency(value: str | None) -> Optional[Decimal]:
    if not value:
        return None
    cleaned = value.strip()
    if not cleaned or cleaned in {"-", "--"}:
        return None
    negative = False
    if cleaned.startswith("(") and cleaned.endswith(")"):
        negative = True
    cleaned = cleaned.replace("$", "").replace(",", "")
    cleaned = cleaned.replace("(", "").replace(")", "")
    cleaned = cleaned.replace("−", "-").strip()
    if not cleaned:
        return None
    try:
        parsed = Decimal(cleaned)
        if negative or parsed < 0:
            parsed = -abs(parsed)
        return parsed
    except InvalidOperation:
        return None
 def _parse_percentage(value: str | None) -> Optional[float]:
    if not value:
        return None
    cleaned = value.strip()
    if not cleaned:
        return None
    negative = False
    if cleaned.startswith("(") and cleaned.endswith(")"):
        negative = True
    cleaned = cleaned.replace("%", "").replace("(", "").replace(")", "")
    cleaned = cleaned.replace("−", "-").strip()
    if not cleaned:
        return None
    try:
        parsed = float(cleaned)
    except ValueError:
        return None
    if negative or parsed < 0:
        parsed = -abs(parsed)
    return parsed
 def _normalize_account_label(label: str) -> AccountSummary:
    normalized = re.sub(r"\s+", " ", label).strip()
    last4_match = re.search(r"(\d{3,4})\b", normalized.replace(" ", ""))
    last4 = last4_match.group(1)[-4:] if last4_match else None
    type_match = re.search(r"^[A-Za-z&'\- ]+", normalized)
    account_type = re.sub(r"\s+", "_", type_match.group(0).strip()) if type_match else "Account"
    account_id = f"{account_type}-{last4}" if last4 else account_type
    return AccountSummary(
        id=account_id,
        label=normalized,
        type=account_type,
        last4=last4,
        is_margin="margin" in normalized.lower(),
    )
 def _match_account(candidate: AccountSummary, requested: AccountSummary | str | None) -> bool:
    if requested is None:
        return True
    if isinstance(requested, AccountSummary):
        requested_values = {
            requested.id.lower(),
            requested.label.lower(),
        }
        if requested.last4:
            requested_values.add(requested.last4.lower())
    else:
        lookup = requested.strip().lower()
        requested_values = {lookup}
    candidate_values = {candidate.id.lower(), candidate.label.lower()}
    if candidate.last4:
        candidate_values.add(candidate.last4.lower())
    return bool(candidate_values & requested_values)
 def _rows_to_dicts(headers: Sequence[str], rows: Sequence[Sequence[str]]) -> list[dict[str, str]]:
    normalized_headers = [header.strip().lower() for header in headers]
    results: list[dict[str, str]] = []
    for row in rows:
        row_map: dict[str, str] = {}
        for idx, header in enumerate(normalized_headers):
            if idx < len(row):
                row_map[header] = row[idx].strip()
        results.append(row_map)
    return results
 async def _extract_table(page) -> dict[str, Any] | None:
    return await page.evaluate(
        """
        () => {
            const wrapper = document.querySelector('.sdps-tables__wrapper');
            if (!wrapper) {
                return null;
            }
            const headerRow = wrapper.querySelector('.sdps-tables__row--header');
            const headers = headerRow
                ? Array.from(headerRow.querySelectorAll('.sdps-tables__header-text'))
                      .map((el) => (el.textContent || '').trim())
                : [];
            if (!headers.length) {
                const legacyHeaders = wrapper.querySelectorAll('thead th');
                if (legacyHeaders.length) {
                    for (const th of legacyHeaders) {
                        headers.push((th.textContent || '').trim());
                    }
                }
            }
            const bodyRows = wrapper.querySelectorAll('.sdps-tables__row--body');
            const rows = [];
            if (bodyRows.length) {
                bodyRows.forEach((row) => {
                    const cells = Array.from(
                        row.querySelectorAll('.sdps-tables__cell, div[role="cell"], td')
                    ).map((cell) => (cell.textContent || '').trim());
                    rows.push(cells);
                });
            }
            if (!rows.length) {
                const fallbackRows = wrapper.querySelectorAll('tbody tr');
                fallbackRows.forEach((row) => {
                    const cells = Array.from(row.querySelectorAll('td')).map((cell) => (cell.textContent || '').trim());
                    if (cells.length) {
                        rows.push(cells);
                    }
                });
            }
            return { headers, rows };
        }
        """
    )
 async def _extract_totals(page) -> dict[str, str | None]:
    return await page.evaluate(
        r"""
        () => {
            const result = { total: null, dayChange: null, dayChangePct: null, cash: null };
            const totalLabel = document.querySelector('#total-value-label');
            if (totalLabel) {
                const valueEl = totalLabel.closest('[class*="sdps-panel"], h2, div');
                if (valueEl) {
                    const currencyMatch = valueEl.textContent?.match(/\$[\d,]+\.?\d*/);
                    if (currencyMatch) {
                        result.total = currencyMatch[0];
                    }
                }
            }
            const dayChangeLabel = document.querySelector('#day-change-label');
            if (dayChangeLabel) {
                const container = dayChangeLabel.parentElement;
                if (container) {
                    const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
                    const matchPct = container.textContent?.match(/-?\d+(?:\.\d+)?%/);
                    if (matchCurrency) {
                        result.dayChange = matchCurrency[0];
                    }
                    if (matchPct) {
                        result.dayChangePct = matchPct[0];
                    }
                }
            }
            const cashLabel = Array.from(document.querySelectorAll('.sdps-tables__header-text')).find((el) =>
                el.textContent?.toLowerCase().includes('cash & cash investments')
            );
            if (cashLabel) {
                const container = cashLabel.closest('div');
                if (container) {
                    const matchCurrency = container.textContent?.match(/\$[\d,]+\.?\d*/);
                    if (matchCurrency) {
                        result.cash = matchCurrency[0];
                    }
                }
            }
            return result;
        }
        """
    )
 def _row_to_overview(row_map: dict[str, str]) -> tuple[AccountSummary, AccountOverview]:
    label = row_map.get('name') or row_map.get('account') or row_map.get('account name') or row_map.get('', '')
    label = label or "Account"
    account_summary = _normalize_account_label(label)
    total_value = _parse_currency(
        row_map.get('account value')
        or row_map.get('total value')
        or row_map.get('market value')
    )
    day_change = _parse_currency(
        row_map.get('day change $')
        or row_map.get('day change')
        or row_map.get('day change amount')
    )
    day_change_pct = _parse_percentage(
        row_map.get('day change %')
        or row_map.get('day change percent')
    )
    cash_value = _parse_currency(
        row_map.get('cash & cash investments')
        or row_map.get('cash')
    )
    settled_cash = _parse_currency(row_map.get('settled cash'))
    buying_power = _parse_currency(row_map.get('buying power') or row_map.get('available to trade'))
    margin_balance = _parse_currency(row_map.get('margin balance') or row_map.get('margin'))
    overview = AccountOverview(
        account=account_summary,
        total_value=total_value,
        day_change=day_change,
        day_change_pct=day_change_pct,
        cash=cash_value,
        settled_cash=settled_cash,
        buying_power=buying_power,
        margin_balance=margin_balance,
    )
    return account_summary, overview
 async def get_account_overview(
    account: AccountSummary | str | None = None, *, debug: bool = False
 ) -> Envelope[AccountOverview]:
    cookies = await ensure_cookies()
    if not cookies:
        return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
    config = load_config()
    playwright_url = get_playwright_url(config)
    playwright = browser = context = page = None
    try:
        playwright, browser = await connect(playwright_url)
        context = await new_context(browser, cookies=cookies)
        page = await new_page(context)
        if not await goto_with_auth_check(page, context, SUMMARY_URL, debug=debug):
            return fail("Failed to load Schwab account summary page.", ErrorType.AUTHENTICATION, retryable=True)
        await asyncio.sleep(1)
        table_data = await _extract_table(page)
        if not table_data:
            return fail("Unable to locate account overview table.", ErrorType.PARSING, retryable=True)
        row_dicts = _rows_to_dicts(table_data["headers"], table_data["rows"])
        matched_overviews: list[AccountOverview] = []
        for row_map in row_dicts:
            # Skip empty rows or totals indicated by lack of numeric data
            values = "".join(row_map.values())
            if not values:
                continue
            summary, overview = _row_to_overview(row_map)
            if _match_account(summary, account):
                matched_overviews.append(overview)
        if not matched_overviews:
            return fail("Account not found in overview table.", ErrorType.VALIDATION, retryable=False)
        if account is None and len(matched_overviews) > 1:
            aggregated = _aggregate_overviews(matched_overviews)
            totals = await _extract_totals(page)
            if totals:
                if totals.get("total"):
                    aggregated.total_value = _parse_currency(totals.get("total"))
                if totals.get("dayChange"):
                    aggregated.day_change = _parse_currency(totals.get("dayChange"))
                if totals.get("dayChangePct"):
                    aggregated.day_change_pct = _parse_percentage(totals.get("dayChangePct"))
                if totals.get("cash"):
                    aggregated.cash = _parse_currency(totals.get("cash"))
            return ok(aggregated)
        return ok(matched_overviews[0])
    except Exception as exc:
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
    finally:
        await _safe_close_page(page)
        await _safe_close_context(context)
        await _safe_close_browser(browser)
        await _safe_stop_playwright(playwright)
 def _aggregate_overviews(overviews: Sequence[AccountOverview]) -> AccountOverview:
    total_value = Decimal("0")
    day_change = Decimal("0")
    cash_total = Decimal("0")
    settled_total = Decimal("0")
    buying_total = Decimal("0")
    margin_total = Decimal("0")
    for item in overviews:
        if item.total_value is not None:
            total_value += item.total_value
        if item.day_change is not None:
            day_change += item.day_change
        if item.cash is not None:
            cash_total += item.cash
        if item.settled_cash is not None:
            settled_total += item.settled_cash
        if item.buying_power is not None:
            buying_total += item.buying_power
        if item.margin_balance is not None:
            margin_total += item.margin_balance
    aggregated_summary = AccountSummary(
        id="AGGREGATE",
        label="All Accounts",
        type="AGGREGATE",
        last4=None,
        is_margin=False,
    )
    total_value_out = total_value if total_value != 0 else None
    day_change_out = day_change if day_change != 0 else None
    cash_out = cash_total if cash_total != 0 else None
    settled_out = settled_total if settled_total != 0 else None
    buying_out = buying_total if buying_total != 0 else None
    margin_out = margin_total if margin_total != 0 else None
    day_change_pct: Optional[float] = None
    if total_value_out and day_change_out:
        try:
            day_change_pct = float((day_change_out / total_value_out) * 100)
        except (InvalidOperation, ZeroDivisionError):
            day_change_pct = None
    return AccountOverview(
        account=aggregated_summary,
        total_value=total_value_out,
        day_change=day_change_out,
        day_change_pct=day_change_pct,
        cash=cash_out,
        settled_cash=settled_out,
        buying_power=buying_out,
        margin_balance=margin_out,
    )
 async def _safe_close_page(page) -> None:
    if page is None:
        return
    try:
        await page.close()
    except Exception:
        pass
 async def _safe_close_context(context) -> None:
    if context is None:
        return
    try:
        await context.close()
    except Exception:
        pass
 async def _safe_close_browser(browser) -> None:
    if browser is None:
        return
    try:
        await browser.close()
    except Exception:
        pass
 async def _safe_stop_playwright(playwright) -> None:
    if playwright is None:
        return
    try:
        await playwright.stop()
    except Exception:
        pass
--- a/schwab_scraper/features/accounts_positions/portfolio_scraper.py
+++ b/schwab_scraper/features/accounts_positions/portfolio_scraper.py
@@ -0,0 +1,134 @@
 from __future__ import annotations
 from decimal import Decimal, InvalidOperation
 from typing import Iterable, Optional
 from ...core import AccountSummary, Envelope, ErrorType, PortfolioSnapshot, Position, fail, ok
 from .positions_scraper import get_positions
 def _aggregate_positions(positions: Iterable[Position]) -> tuple[list[Position], Optional[Decimal]]:
    aggregated: dict[str, Position] = {}
    total_value = Decimal("0")
    has_value = False
    for position in positions:
        if position.market_value is not None:
            total_value += position.market_value
            has_value = True
        key = position.symbol.upper() if position.symbol else "UNKNOWN"
        if key not in aggregated:
            aggregated[key] = Position(
                symbol=position.symbol,
                description=position.description,
                asset_type=position.asset_type,
                quantity=position.quantity,
                market_price=position.market_price,
                market_value=position.market_value,
                cost_basis_total=position.cost_basis_total,
                unrealized_gain=position.unrealized_gain,
                unrealized_gain_pct=position.unrealized_gain_pct,
                lots=list(position.lots),
            )
            continue
        existing = aggregated[key]
        if position.quantity is not None:
            if existing.quantity is None:
                existing.quantity = position.quantity
            else:
                existing.quantity += position.quantity
        if position.market_value is not None:
            if existing.market_value is None:
                existing.market_value = position.market_value
            else:
                existing.market_value += position.market_value
        if position.cost_basis_total is not None:
            if existing.cost_basis_total is None:
                existing.cost_basis_total = position.cost_basis_total
            else:
                existing.cost_basis_total += position.cost_basis_total
        if position.unrealized_gain is not None:
            if existing.unrealized_gain is None:
                existing.unrealized_gain = position.unrealized_gain
            else:
                existing.unrealized_gain += position.unrealized_gain
        if position.market_price is not None:
            existing.market_price = position.market_price
        if position.unrealized_gain_pct is not None:
            existing.unrealized_gain_pct = position.unrealized_gain_pct
        if position.description and not existing.description:
            existing.description = position.description
        if position.asset_type:
            existing.asset_type = position.asset_type
        if position.lots:
            existing.lots.extend(position.lots)
    for item in aggregated.values():
        if item.unrealized_gain is not None and item.cost_basis_total not in (None, Decimal("0")):
            try:
                item.unrealized_gain_pct = float((item.unrealized_gain / item.cost_basis_total) * 100)
            except (InvalidOperation, ZeroDivisionError):
                item.unrealized_gain_pct = None
    total_value_out = total_value if has_value else None
    return list(aggregated.values()), total_value_out
 async def get_portfolio_snapshot(
    account: AccountSummary | str | None = None,
    *,
    aggregate_by_symbol: bool = True,
    include_non_equity: bool = False,
    debug: bool = False,
 ) -> Envelope[PortfolioSnapshot]:
    positions_envelope = await get_positions(
        account=account,
        include_non_equity=include_non_equity,
        debug=debug,
    )
    if not positions_envelope["success"]:
        return fail(
            positions_envelope.get("error") or "Failed to retrieve positions.",
            positions_envelope.get("error_type") or ErrorType.UNKNOWN,
            positions_envelope.get("retryable", True),
        )
    positions = positions_envelope["data"] or []
    if aggregate_by_symbol:
        aggregated_positions, total_value = _aggregate_positions(positions)
        count = len(aggregated_positions)
        snapshot = PortfolioSnapshot(
            equities=aggregated_positions,
            total_value=total_value,
            count=count,
        )
        return ok(snapshot)
    total_value = Decimal("0")
    has_value = False
    for position in positions:
        if position.market_value is not None:
            total_value += position.market_value
            has_value = True
    total_value_out = total_value if has_value else None
    snapshot = PortfolioSnapshot(
        equities=positions,
        total_value=total_value_out,
        count=len(positions),
    )
    return ok(snapshot)
--- a/schwab_scraper/features/accounts_positions/positions_scraper.py
+++ b/schwab_scraper/features/accounts_positions/positions_scraper.py
@@ -0,0 +1,432 @@
 from __future__ import annotations
 import re
 from decimal import Decimal, InvalidOperation
 from typing import Any, Optional, Sequence
 from ...browser.auth import ensure_cookies
 from ...browser.client import connect, new_context, new_page
 from ...browser.navigation import goto_with_auth_check
 from ...core import AccountSummary, Envelope, ErrorType, Lot, Position, fail, ok
 from ...core.config import get_playwright_url, load_config
 POSITIONS_URL = "https://client.schwab.com/app/accounts/positions/#/"
 def _parse_decimal(value: str | None) -> Optional[Decimal]:
    if not value:
        return None
    cleaned = value.strip()
    if not cleaned or cleaned in {"-", "--"}:
        return None
    negative = False
    if cleaned.startswith("(") and cleaned.endswith(")"):
        negative = True
    cleaned = (
        cleaned.replace("$", "")
        .replace(",", "")
        .replace("(", "")
        .replace(")", "")
        .replace("−", "-")
        .replace("%", "")
        .strip()
    )
    if not cleaned:
        return None
    try:
        parsed = Decimal(cleaned)
        if negative or parsed < 0:
            parsed = -abs(parsed)
        return parsed
    except InvalidOperation:
        return None
 def _parse_float(value: str | None) -> Optional[float]:
    decimal_value = _parse_decimal(value)
    if decimal_value is None:
        return None
    try:
        return float(decimal_value)
    except (ValueError, InvalidOperation):
        return None
 def _normalize_account_label(label: str) -> AccountSummary:
    normalized = re.sub(r"\s+", " ", label).strip()
    last4_match = re.search(r"(\d{3,4})\b", normalized.replace(" ", ""))
    last4 = last4_match.group(1)[-4:] if last4_match else None
    type_match = re.search(r"^[A-Za-z&'\- ]+", normalized)
    account_type = re.sub(r"\s+", "_", type_match.group(0).strip()) if type_match else "Account"
    account_id = f"{account_type}-{last4}" if last4 else account_type
    return AccountSummary(
        id=account_id,
        label=normalized,
        type=account_type,
        last4=last4,
        is_margin="margin" in normalized.lower(),
    )
 def _match_account(candidate: AccountSummary, requested: AccountSummary | str | None) -> bool:
    if requested is None:
        return True
    if isinstance(requested, AccountSummary):
        requested_values = {
            requested.id.lower(),
            requested.label.lower(),
        }
        if requested.last4:
            requested_values.add(requested.last4.lower())
    else:
        lookup = requested.strip().lower()
        requested_values = {lookup}
    candidate_values = {candidate.id.lower(), candidate.label.lower()}
    if candidate.last4:
        candidate_values.add(candidate.last4.lower())
    return bool(candidate_values & requested_values)
 def classify_asset(symbol: str | None, description: str | None) -> str:
    if symbol:
        sym = symbol.strip().upper()
    else:
        sym = ""
    desc = (description or "").strip().upper()
    if sym and re.fullmatch(r"[A-Z]{1,5}", sym):
        if "ETF" in desc:
            return "ETF"
        if any(kw in desc for kw in ["FUND", "MUTUAL"]):
            return "MUTUAL_FUND"
        return "EQUITY"
    if sym and re.search(r"\d", sym) and len(sym) > 5:
        return "OPTION"
    if any(kw in desc for kw in ["BOND", "CD", "TREASURY"]):
        return "BOND"
    if sym in {"CASH", "MMDA", "SWEEP"} or "CASH" in desc:
        return "CASH"
    if "ETF" in desc:
        return "ETF"
    if "FUND" in desc:
        return "MUTUAL_FUND"
    return "OTHER"
 async def _evaluate_table(page) -> dict[str, Any] | None:
    return await page.evaluate(
        """
        () => {
            const table = document.querySelector('#positionsDetails');
            if (!table) {
                return null;
            }
            const headers = Array.from(table.querySelectorAll('thead tr th')).map((th) =>
                (th.innerText || th.textContent || '').trim()
            );
            const rowElements = Array.from(table.querySelectorAll('tbody tr'));
            const rows = [];
            let current = null;
            let currentAccount = null;
            const isLotRow = (row) => {
                const klass = (row.className || '').toLowerCase();
                if (klass.includes('lot') || klass.includes('sub') || klass.includes('child')) {
                    return true;
                }
                const dataRole = (row.getAttribute('data-row-type') || '').toLowerCase();
                return dataRole.includes('lot');
            };
            const isPositionRow = (row) => {
                const klass = (row.className || '').toLowerCase();
                return klass.includes('position-row');
            };
            const isAccountHeader = (row) => {
                const klass = (row.className || '').toLowerCase();
                const text = (row.textContent || '').trim();
                return !klass.includes('position-row') && 
                       (klass.includes('highlight-row') || klass.includes('border-top-dark')) &&
                       text.includes('account panel');
            };
            for (const row of rowElements) {
                // Check if this is an account header row
                if (isAccountHeader(row)) {
                    const text = row.textContent.trim();
                    // Extract account name from account panel text
                    const match = text.match(/account panel[\\s\\n]+([^\\n]+)/);
                    if (match) {
                        currentAccount = match[1].trim();
                    }
                    continue;
                }
                const cells = Array.from(row.querySelectorAll('td')).map((cell) =>
                    (cell.innerText || cell.textContent || '').trim()
                );
                if (!cells.length) {
                    continue;
                }
                if (isLotRow(row)) {
                    if (current) {
                        current.lots.push(cells);
                    }
                } else if (isPositionRow(row)) {
                    // Extract symbol from data-symbol attribute
                    const symbol = row.getAttribute('data-symbol') || '';
                    current = { 
                        type: 'position', 
                        cells: cells, 
                        lots: [],
                        symbol: symbol,
                        account: currentAccount
                    };
                    rows.push(current);
                }
            }
            return { headers, rows };
        }
        """
    )
 def _map_row(headers: Sequence[str], cells: Sequence[str]) -> dict[str, str]:
    result: dict[str, str] = {}
    # Special handling: The table has columns in headers that don't correspond to cells
    # Headers: ['', 'Symbol', 'Description', 'Qty', 'Price', ...]
    # Cells:   ['VANGUARD...', '192.5', '$328.17', ...]
    # The first two headers (empty checkbox and Symbol) have no corresponding cells
    # So: Cell 0 → 'Description', Cell 1 → 'Qty', Cell 2 → 'Price', etc.
    # Find the symbol header index to know where the offset starts
    symbol_header_idx = None
    for idx, header in enumerate(headers):
        key = header.strip().lower()
        if 'symbol' in key and 'description' not in key:
            symbol_header_idx = idx
            break
    # Calculate offset - typically 2 (empty column + symbol column)
    offset = symbol_header_idx + 1 if symbol_header_idx is not None else 0
    for idx, header in enumerate(headers):
        # Normalize header: take first line, strip, lowercase
        # Headers often have format "Label\nsort\nfieldname"
        header_parts = header.strip().split('\n')
        key = header_parts[0].strip().lower() if header_parts else ""
        if not key:
            key = f"column_{idx}"
        # Map header to cell with offset
        if idx < offset:
            # These headers (empty, symbol) have no corresponding cells
            value = ""
        else:
            cell_idx = idx - offset
            value = cells[cell_idx].strip() if cell_idx < len(cells) else ""
        result[key] = value
    return result
 def _parse_lots(lot_rows: Sequence[Sequence[str]]) -> list[Lot]:
    lots: list[Lot] = []
    for cells in lot_rows:
        if not cells:
            continue
        acquired_date = cells[0].strip() if len(cells) > 0 else None
        quantity = _parse_float(cells[1] if len(cells) > 1 else None)
        cost_basis = _parse_decimal(cells[2] if len(cells) > 2 else None)
        lot_id = cells[3].strip() if len(cells) > 3 else None
        lots.append(
            Lot(
                acquired_date=acquired_date or None,
                quantity=quantity,
                cost_basis=cost_basis,
                lot_id=lot_id or None,
            )
        )
    return lots
 def _row_to_position(row_map: dict[str, str], lots_rows: Sequence[Sequence[str]], symbol: str = "") -> Position:
    # Symbol is now passed from data-symbol attribute on row
    # Description is in the first visible cell
    description = row_map.get('description') or row_map.get('name') or row_map.get('column_1') or ""
    # Price is typically in column labeled 'price' or similar
    market_price = _parse_decimal(
        row_map.get('price')
        or row_map.get('market price')
        or row_map.get('last price')
    )
    # Quantity - now in different column due to layout change
    quantity = _parse_float(row_map.get('quantity') or row_map.get('qty'))
    market_value = _parse_decimal(row_map.get('market value') or row_map.get('mkt val'))
    cost_basis_total = _parse_decimal(row_map.get('cost basis') or row_map.get('total cost'))
    unrealized_gain = _parse_decimal(
        row_map.get('gain/loss $')
        or row_map.get('unrealized gain')
        or row_map.get('gain/loss')
    )
    unrealized_gain_pct = _parse_float(
        row_map.get('gain/loss %')
        or row_map.get('unrealized gain %')
    )
    asset_type = classify_asset(symbol, description)
    lots = _parse_lots(lots_rows)
    return Position(
        symbol=symbol or "",
        description=description or None,
        asset_type=asset_type,
        quantity=quantity,
        market_price=market_price,
        market_value=market_value,
        cost_basis_total=cost_basis_total,
        unrealized_gain=unrealized_gain,
        unrealized_gain_pct=unrealized_gain_pct,
        lots=lots,
    )
 async def get_positions(
    account: AccountSummary | str | None = None,
    *,
    include_non_equity: bool = False,
    debug: bool = False,
 ) -> Envelope[list[Position]]:
    cookies = await ensure_cookies()
    if not cookies:
        return fail("Unable to establish Schwab session.", ErrorType.AUTHENTICATION, retryable=False)
    config = load_config()
    playwright_url = get_playwright_url(config)
    playwright = browser = context = page = None
    try:
        playwright, browser = await connect(playwright_url)
        context = await new_context(browser, cookies=cookies)
        page = await new_page(context)
        if not await goto_with_auth_check(page, context, POSITIONS_URL, debug=debug):
            return fail("Failed to load Schwab positions page.", ErrorType.AUTHENTICATION, retryable=True)
        await page.wait_for_selector('#positionsDetails', timeout=45000)
        await page.wait_for_timeout(1000)
        await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
        await page.wait_for_timeout(1500)
        table_data = await _evaluate_table(page)
        if not table_data:
            return fail("Unable to locate positions table.", ErrorType.PARSING, retryable=True)
        headers = [header.strip().lower() for header in table_data.get('headers') or []]
        if not headers:
            return fail("Positions table headers not found.", ErrorType.PARSING, retryable=True)
        positions: list[Position] = []
        for row in table_data.get('rows', []):
            if row.get('type') != 'position':
                continue
            cells = row.get('cells') or []
            symbol = row.get('symbol') or ""
            account_label = row.get('account') or ""
            row_map = _map_row(headers, cells)
            position = _row_to_position(row_map, row.get('lots') or [], symbol=symbol)
            # Filter by account if requested
            if account is not None and account_label:
                # Normalize the account label from the row
                account_summary = _normalize_account_label(account_label)
                if not _match_account(account_summary, account):
                    continue
            elif account is not None and not account_label:
                # If filtering by account but row has no account, skip it
                continue
            if not include_non_equity and position.asset_type not in {"EQUITY", "ETF"}:
                continue
            positions.append(position)
        if not positions:
            return fail("No positions matched the requested criteria.", ErrorType.VALIDATION, retryable=False)
        return ok(positions)
    except Exception as exc:
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
    finally:
        await _safe_close_page(page)
        await _safe_close_context(context)
        await _safe_close_browser(browser)
        await _safe_stop_playwright(playwright)
 async def _safe_close_page(page) -> None:
    if page is None:
        return
    try:
        await page.close()
    except Exception:
        pass
 async def _safe_close_context(context) -> None:
    if context is None:
        return
    try:
        await context.close()
    except Exception:
        pass
 async def _safe_close_browser(browser) -> None:
    if browser is None:
        return
    try:
        await browser.close()
    except Exception:
        pass
 async def _safe_stop_playwright(playwright) -> None:
    if playwright is None:
        return
    try:
        await playwright.stop()
    except Exception:
        pass
--- a/schwab_scraper/features/equity/init.py
+++ b/schwab_scraper/features/equity/init.py
--- a/schwab_scraper/features/equity/morningstar.py
+++ b/schwab_scraper/features/equity/morningstar.py
@@ -0,0 +1,239 @@
 from typing import Optional, Tuple
 import logging
 async def find_report(page, debug: bool = False) -> Tuple[Optional[str], Optional[str]]:
    """Locate the Morningstar Equity Report link and date on the stock page.
    Uses multiple fallback strategies to handle Schwab website changes.
    Returns:
        Tuple of (url, date) where:
        - url: The href attribute if it's a traditional link, or a special marker
               '__CLICK_TO_OPEN__' if it's a JavaScript/blob link that requires clicking
        - date: The report date string if found
    """
    logger = logging.getLogger(__name__)
    # Strategy 1: Original selector
    report_link_selector = "div[id='Morningstar Equity Report'] a.sr-report-link"
    if await page.is_visible(report_link_selector):
        if debug:
            logger.debug("Found Morningstar report using original selector")
        report_link_element = page.locator(report_link_selector)
        await report_link_element.scroll_into_view_if_needed()
        url = await report_link_element.get_attribute("href")
        # Date element (escaped spaces)
        date_locator = page.locator(r"#Morningstar\ Equity\ Report > span:nth-child(3) > sdps-date-time > time > span:nth-child(2)")
        date_text = (await date_locator.inner_text()).strip() if await date_locator.count() > 0 else None
        # Check if href is empty (modern web component using blob URLs)
        if not url or url == '':
            if debug:
                logger.debug("Link found but href is empty - this is a modern web component that generates blob URLs on click")
            # Return a special marker to indicate we need to click the link to get the URL
            return '__CLICK_TO_OPEN__', date_text
        return url, date_text
    # Strategy 2: Look for any link containing "morningstar" in research section
    if debug:
        logger.debug("Original selector failed, trying fallback selectors...")
    fallback_selectors = [
        "a.sr-report-link[href*='morningstar']",
        "a[href*='morningstar'][href*='pdf']",
        "#morningstar-section a.sr-report-link",
        "div[id*='Morningstar'] a",
    ]
    for selector in fallback_selectors:
        try:
            if await page.is_visible(selector, timeout=2000):
                if debug:
                    logger.debug(f"Found Morningstar report using fallback selector: {selector}")
                report_link_element = page.locator(selector).first
                await report_link_element.scroll_into_view_if_needed()
                url = await report_link_element.get_attribute("href")
                # Try to find date with various selectors
                date_text = None
                date_selectors = [
                    r"#Morningstar\ Equity\ Report > span:nth-child(3) > sdps-date-time > time > span:nth-child(2)",
                    "sdps-date-time time span",
                    "time span",
                ]
                for date_sel in date_selectors:
                    try:
                        date_locator = page.locator(date_sel)
                        if await date_locator.count() > 0:
                            date_text = (await date_locator.first.inner_text()).strip()
                            if date_text:
                                break
                    except:
                        continue
                return url, date_text
        except Exception as e:
            if debug:
                logger.debug(f"Fallback selector {selector} failed: {e}")
            continue
    # Strategy 3: Use JavaScript to search for Morningstar links
    if debug:
        logger.debug("All CSS selectors failed, trying JavaScript search...")
    try:
        result = await page.evaluate("""
            () => {
                // Look for any link containing 'morningstar' and 'pdf'
                const links = Array.from(document.querySelectorAll('a[href]'));
                const morningstarLink = links.find(link => 
                    link.href.toLowerCase().includes('morningstar') && 
                    link.href.toLowerCase().includes('pdf')
                );
                if (morningstarLink) {
                    // Try to find associated date
                    let dateText = null;
                    const parent = morningstarLink.closest('[id*="Morningstar"]') || morningstarLink.parentElement;
                    if (parent) {
                        const timeElement = parent.querySelector('time');
                        if (timeElement) {
                            dateText = timeElement.textContent.trim();
                        }
                    }
                    return {
                        url: morningstarLink.href,
                        date: dateText
                    };
                }
                return null;
            }
        """)
        if result and result.get('url'):
            if debug:
                logger.debug(f"Found Morningstar report using JavaScript search: {result['url']}")
            return result['url'], result.get('date')
    except Exception as e:
        if debug:
            logger.debug(f"JavaScript search failed: {e}")
    # No report found
    if debug:
        logger.debug("No Morningstar report link found using any strategy")
        # Capture page state for debugging
        try:
            await page.screenshot(path="debug_morningstar_not_found.png", full_page=True)
            logger.debug("Saved debug screenshot to: debug_morningstar_not_found.png")
            # Log available elements for debugging
            page_info = await page.evaluate("""
                () => {
                    return {
                        hasMorningstarSection: !!document.querySelector('#morningstar-section'),
                        hasMorningstarDiv: !!document.querySelector('div[id*="Morningstar"]'),
                        morningstarLinks: Array.from(document.querySelectorAll('a[href]'))
                            .filter(a => a.href.toLowerCase().includes('morningstar'))
                            .length,
                        allReportLinks: Array.from(document.querySelectorAll('a.sr-report-link')).length
                    }
                }
            """)
            logger.debug(f"Page state: {page_info}")
        except Exception as e:
            logger.debug(f"Failed to capture debug info: {e}")
    return None, None
 async def download_report_as_bytes(page, url: str, debug: bool = False) -> Optional[bytes]:
    """Open the PDF in a new page and return bytes via data URL conversion.
    Args:
        page: The current Playwright page
        url: Either a traditional URL or '__CLICK_TO_OPEN__' marker for blob URLs
        debug: Enable debug logging
    Returns:
        PDF bytes if successful, None otherwise
    """
    logger = logging.getLogger(__name__)
    if not url:
        return None
    # Handle blob URL case (modern web component)
    if url == '__CLICK_TO_OPEN__':
        if debug:
            logger.debug("Handling blob URL - clicking link to open PDF")
        # Click the Morningstar report link to open the PDF
        report_link_selector = "div[id='Morningstar Equity Report'] a.sr-report-link"
        try:
            # Wait for new page to open after clicking
            new_page_promise = page.context.wait_for_event("page", timeout=15000)
            await page.click(report_link_selector)
            new_page = await new_page_promise
            if debug:
                logger.debug(f"New page opened with URL: {new_page.url}")
            # Wait for PDF to load
            await new_page.wait_for_load_state('load', timeout=10000)
            # The PDF is now loaded as a blob URL - extract it
            blob_url = new_page.url
        except Exception as e:
            if debug:
                logger.debug(f"Error clicking link to open PDF: {e}")
            return None
    else:
        # Traditional URL case
        if debug:
            logger.debug(f"Opening PDF from traditional URL: {url}")
        new_page_promise = page.context.wait_for_event("page")
        await page.evaluate("url => window.open(url, '_blank')", url)
        new_page = await new_page_promise
        await new_page.wait_for_load_state('load')
        blob_url = url
    # Fetch and convert to Base64 in browser context
    try:
        pdf_base64 = await new_page.evaluate(
            """
            async (url) => {
                const response = await fetch(url);
                const blob = await response.blob();
                return await new Promise((resolve) => {
                    const reader = new FileReader();
                    reader.onloadend = () => resolve(reader.result.split(',')[1]);
                    reader.readAsDataURL(blob);
                });
            }
            """,
            blob_url,
        )
        await new_page.close()
        if not pdf_base64:
            return None
        import base64
        return base64.b64decode(pdf_base64)
    except Exception as e:
        if debug:
            logger.debug(f"Error extracting PDF bytes: {e}")
        try:
            await new_page.close()
        except:
            pass
        return None
--- a/schwab_scraper/features/equity/parser.py
+++ b/schwab_scraper/features/equity/parser.py
@@ -0,0 +1,80 @@
 import re
 from io import BytesIO
 from typing import Dict
 import pdfplumber
 def clean_value(label: str, value: str) -> str:
    """Cleans the extracted value based on the label."""
    if label == "Morningstar Rating":
        return f"{value.count('Q')} stars"
    if label == "Economic Moat":
        if "Wide" in value:
            return "Wide"
        if "Narrow" in value:
            return "Narrow"
        if "None" in value:
            return "None"
    if label in ["Fair Value", "1-Star Price", "5-Star Price"]:
        match = re.match(r"[\d,]+\.\d{2}", value)
        if match:
            return match.group(0)
    if label == "Assessment":
        return value.split()[0]
    if label == "52-Week-Range":
        return value.replace('\u2014', '-')
    if label == "52-Week Range":
        return value.replace('\u2014', '-')
    return value
 def parse(pdf_content: bytes) -> Dict[str, str]:
    """
    Parses a Morningstar PDF report to extract key data points.
    Returns a dict keyed by the label names present in the report.
    """
    with pdfplumber.open(BytesIO(pdf_content)) as pdf:
        page = pdf.pages[2]  # Page 3
        words = page.extract_words(x_tolerance=1, y_tolerance=1, keep_blank_chars=False)
        data: Dict[str, str] = {}
        labels = [
            "Fair Value", "1-Star Price", "5-Star Price", "Assessment",
            "Dividend Yield", "Capital Allocation", "52-Week Range", "Investment Style",
            "Economic Moat", "Morningstar Rating"
        ]
        for i, word in enumerate(words):
            # Combine words to form potential labels
            for j in range(i + 1, min(i + 4, len(words))):
                potential_label = " ".join(w['text'] for w in words[i:j])
                if potential_label in labels:
                    if potential_label == "Economic Moat":
                        # Find the value to the right of the label
                        label_end_x = words[j-1]['x1']
                        value_words = [
                            w['text'] for w in words[j:]
                            if abs(w['top'] - word['top']) < 2 and w['x0'] > label_end_x and w['x0'] - label_end_x < 100
                        ]
                        if value_words:
                            value = " ".join(value_words)
                            if "Wide" in value:
                                data[potential_label] = "Wide"
                            elif "Narrow" in value:
                                data[potential_label] = "Narrow"
                            elif "None" in value:
                                data[potential_label] = "None"
                            break
                    else:
                        # Find the value to the right of the label
                        label_end_x = words[j-1]['x1']
                        value_words = [
                            w['text'] for w in words[j:]
                            if abs(w['top'] - word['top']) < 2 and w['x0'] > label_end_x and w['x0'] - label_end_x < 100
                        ]
                        if value_words:
                            # Join the value words and clean them
                            value = " ".join(value_words)
                            data[potential_label] = clean_value(potential_label, value)
                            break  # Move to the next word once a label is found
        return data
--- a/schwab_scraper/features/equity/phase1_api_scraper.py
+++ b/schwab_scraper/features/equity/phase1_api_scraper.py
@@ -0,0 +1,490 @@
 """Phase 1: API-Based Data Extraction (EXPERIMENTAL - NON-FUNCTIONAL)
 ⚠️ **STATUS: NON-FUNCTIONAL DUE TO CORS RESTRICTIONS** ⚠️
 This module was an attempt to extract equity data by calling Schwab's REST APIs directly.
 While the APIs exist and were discovered via HAR analysis, they are NOT accessible from 
 this scraper due to fundamental browser security limitations (CORS).
 ## Why This Approach Failed:
 1. **CORS (Cross-Origin Resource Sharing) Restrictions**: 
   - Research page: `client.schwab.com`, APIs: `ausgateway.schwab.com` (different origins)
   - Browser blocks cross-origin fetch() calls even from page.evaluate()
   - Results in "TypeError: Failed to fetch"
 2. **Authentication Complexity**:
   - Direct HTTP (aiohttp) with cookies: 401/403 errors
   - Playwright page.request.fetch(): 401 errors (separate context)
   - Likely requires dynamic tokens beyond cookies
 ## Recommendation:
 **Use `phase1_scraper.py` (DOM scraping) instead**. It works reliably with authenticated
 sessions and extracts all Phase 1 fields without CORS limitations.
 ## API Endpoints (discovered but inaccessible):
 - Quote: /api/is.ResearchExperience/v1/quote
 - Dividends: /api/is.ResearchExperience/v1/events/dividends
 - Earnings: /api/is.ResearchExperience/v1/events/earnings
 - Share Profile: /api/is.ResearchExperience/v1/shareprofile
 """
 from typing import Dict, Any, Optional, List
 import logging
 import uuid
 import aiohttp
 from playwright.async_api import Page
 from ...core import (
    QuoteData, EnhancedDividends, EarningsData, 
    CalculatedMetrics, EquityPhase1Data
 )
 logger = logging.getLogger(__name__)
 def _parse_float(value: Any) -> Optional[float]:
    """Safely parse a value to float."""
    if value is None:
        return None
    try:
        if isinstance(value, str):
            # Remove % sign if present
            value = value.replace('%', '').strip()
        return float(value)
    except (ValueError, TypeError):
        return None
 def _parse_market_cap(value: str) -> Optional[str]:
    """Parse market cap string like '$3.03T' or '$462.11B'."""
    if not value:
        return None
    # Keep the formatted string as-is for readability
    return value.strip()
 def _parse_volume(value: Any) -> Optional[int]:
    """Parse volume value."""
    if value is None:
        return None
    try:
        return int(float(value))
    except (ValueError, TypeError):
        return None
 def parse_quote_api_response(data: Dict[str, Any]) -> QuoteData:
    """Parse quote API response into QuoteData object.
    API Response Structure:
    {
      "reference": {
        "symbol": "JNJ",
        "companyName": "JOHNSON & JOHNSON",
        "exchangeName": "NYSE"
      },
      "quote": {
        "lastPrice": 193.155,
        "netChange": 1.275,
        "netChangePercent": 0.6644778,
        "postMarketChange": 0.0,
        "postMarketPercentChange": 0.0,
        "tradeTime": "2025-10-22T17:06:42.008Z"
      },
      "regularQuote": {
        "lastPrice": 193.155,
        "lastSize": 100.0,
        "netChange": 1.275,
        "percentChange": 0.6644778,
        ...
      }
    }
    """
    quote = QuoteData()
    try:
        reference = data.get('reference', {})
        quote_data = data.get('quote', {})
        regular_quote = data.get('regularQuote', {})
        # Basic info
        quote.exchange = reference.get('exchangeName')
        # Price data
        quote.price = _parse_float(quote_data.get('lastPrice'))
        quote.change = _parse_float(quote_data.get('netChange'))
        quote.change_percent = _parse_float(quote_data.get('netChangePercent'))
        # After hours (post market)
        quote.after_hours_change = _parse_float(quote_data.get('postMarketChange'))
        quote.after_hours_change_percent = _parse_float(quote_data.get('postMarketPercentChange'))
        # Extended quote data
        quote.previous_close = _parse_float(regular_quote.get('closePrice'))
        quote.open = _parse_float(regular_quote.get('openPrice'))
        quote.bid = _parse_float(regular_quote.get('bidPrice'))
        quote.ask = _parse_float(regular_quote.get('askPrice'))
        quote.volume = _parse_volume(regular_quote.get('totalVolume'))
        quote.day_range_low = _parse_float(regular_quote.get('lowPrice'))
        quote.day_range_high = _parse_float(regular_quote.get('highPrice'))
        quote.week_52_low = _parse_float(regular_quote.get('priceLow52W'))
        quote.week_52_high = _parse_float(regular_quote.get('priceHigh52W'))
        # Bid/Ask size
        bid_size = regular_quote.get('bidSize', 0)
        ask_size = regular_quote.get('askSize', 0)
        if bid_size or ask_size:
            quote.bid_ask_size = f"{bid_size}/{ask_size}"
        # Volume vs average
        avg_volume_label = regular_quote.get('averageVolumeDaily')
        if avg_volume_label:
            quote.volume_vs_avg = avg_volume_label
    except Exception as e:
        logger.debug(f"Error parsing quote API response: {e}")
    return quote
 def parse_dividends_api_response(data: Dict[str, Any]) -> EnhancedDividends:
    """Parse dividends API response into EnhancedDividends object.
    API Response Structure:
    {
      "symbol": "JNJ",
      "currentAnnualDividendMethod": "IAD",
      "status": "DIVIDENDS_PAID_CURRENTLY",
      "dividends": [
        {
          "dividendPayment": 1.3,
          "dividendPayDate": "December 09, 2025",
          "dividendExDate": "November 25, 2025",
          "dividendFrequency": "Quarterly",
          "annualDividendRate": 5.2,
          "dividendYield": "2.71%"
        },
        ...
      ]
    }
    """
    dividends = EnhancedDividends()
    try:
        dividend_list = data.get('dividends', [])
        if not dividend_list:
            return dividends
        # Most recent dividend is first
        latest = dividend_list[0]
        # Next/upcoming dividend data
        dividends.next_payment = _parse_float(latest.get('dividendPayment'))
        dividends.next_pay_date = latest.get('dividendPayDate')
        dividends.next_ex_date = latest.get('dividendExDate')
        dividends.frequency = latest.get('dividendFrequency')
        dividends.annual_rate = _parse_float(latest.get('annualDividendRate'))
        dividends.annual_yield = _parse_float(latest.get('dividendYield'))
        # Previous dividend (if there's more than one in history)
        if len(dividend_list) > 1:
            previous = dividend_list[1]
            dividends.previous_payment = _parse_float(previous.get('dividendPayment'))
            dividends.previous_pay_date = previous.get('dividendPayDate')
            dividends.previous_ex_date = previous.get('dividendExDate')
    except Exception as e:
        logger.debug(f"Error parsing dividends API response: {e}")
    return dividends
 def parse_earnings_api_response(data: Dict[str, Any]) -> EarningsData:
    """Parse earnings API response into EarningsData object.
    API Response Structure:
    {
      "symbol": "GOOGL",
      "fundamentals": {},
      "upcoming": {
        "earningsDate": "10/29/2025",
        "numberOfAnalysts": 43,
        "epsNonGaapEstimate": 2.18
      },
      "historical": [
        {
          "epsGaapActual": 2.31,
          "epsNonGaapActual": 2.31,
          "earningsDate": "07/23/2025",
          "numberOfAnalysts": 43,
          "epsNonGaapEstimate": 2.18,
          "epsNonGaapEstimateHigh": 2.42,
          "epsNonGaapEstimateLow": 2.0
        }
      ]
    }
    """
    earnings = EarningsData()
    try:
        upcoming = data.get('upcoming', {})
        historical = data.get('historical', [])
        fundamentals = data.get('fundamentals', {})
        # Upcoming earnings
        if upcoming:
            earnings.next_announcement_date = upcoming.get('earningsDate')
            earnings.announcement_timing = upcoming.get('announcementTiming')
            earnings.analysts_covering = upcoming.get('numberOfAnalysts')
            earnings.consensus_estimate = _parse_float(upcoming.get('epsNonGaapEstimate'))
            earnings.estimate_high = _parse_float(upcoming.get('epsNonGaapEstimateHigh'))
            earnings.estimate_low = _parse_float(upcoming.get('epsNonGaapEstimateLow'))
        # Historical earnings (most recent)
        if historical:
            latest = historical[0]
            earnings.eps_ttm = _parse_float(latest.get('epsNonGaapActual') or latest.get('epsGaapActual'))
            # If we don't have upcoming, use latest historical for analyst data
            if not upcoming:
                earnings.analysts_covering = latest.get('numberOfAnalysts')
                earnings.consensus_estimate = _parse_float(latest.get('epsNonGaapEstimate'))
                earnings.estimate_high = _parse_float(latest.get('epsNonGaapEstimateHigh'))
                earnings.estimate_low = _parse_float(latest.get('epsNonGaapEstimateLow'))
            # Beat/miss information
            beat_amount = latest.get('epsNonGaapBeat')
            if beat_amount is not None:
                earnings.recent_beats = [{
                    'beat_amount': _parse_float(beat_amount),
                    'beat_percent': _parse_float(latest.get('epsNonGaapBeatPercent')),
                    'date': latest.get('earningsDate')
                }]
        # Fundamentals (PE ratios, revenue)
        if fundamentals:
            earnings.pe_ttm = _parse_float(fundamentals.get('peRatio'))
            earnings.forward_pe = _parse_float(fundamentals.get('forwardPE'))
            earnings.peg_ratio = _parse_float(fundamentals.get('pegRatio'))
            earnings.revenue_ttm = _parse_float(fundamentals.get('revenue'))
    except Exception as e:
        logger.debug(f"Error parsing earnings API response: {e}")
    return earnings
 def parse_shareprofile_api_response(data: Dict[str, Any], quote: QuoteData) -> QuoteData:
    """Parse share profile API response and enhance QuoteData with market cap, etc.
    API Response Structure:
    {
      "companySummary": {
        "marketCapLabel": "Large Cap",
        "marketCapValue": "$462.11B",
        "companyEnterpriseValue": "$462.11B"
      },
      "shareInfo": [{
        "sharesOutstanding": "2.41B",
        "sharesHeld": "71.29%"
      }]
    }
    """
    try:
        company_summary = data.get('companySummary', {})
        # Market cap
        quote.market_cap = _parse_market_cap(company_summary.get('marketCapValue'))
        # Sector info might be in other fields
        # Note: Sector information may not be in shareprofile API
        # It might be in securityprofiles or other endpoints
    except Exception as e:
        logger.debug(f"Error parsing share profile API response: {e}")
    return quote
 def calculate_payout_ratio(annual_dividend: Optional[float], eps_ttm: Optional[float]) -> Optional[float]:
    """Calculate dividend payout ratio.
    Formula: (Annual Dividend Rate / EPS TTM) × 100
    """
    if annual_dividend and eps_ttm and eps_ttm > 0:
        ratio = (annual_dividend / eps_ttm) * 100
        return round(ratio, 2)
    return None
 async def call_schwab_api(page: Page, url: str, debug: bool = False) -> Optional[Dict[str, Any]]:
    """Call a Schwab API endpoint from within the browser's JavaScript context.
    This uses page.evaluate() to run fetch() directly in the browser, which ensures
    all cookies, authentication tokens, and session state are automatically included.
    This is the most reliable way to call Schwab APIs.
    Args:
        page: Playwright page with authenticated session
        url: API endpoint URL
        debug: Enable debug logging
    Returns:
        Parsed JSON response or None on error
    """
    try:
        if debug:
            logger.debug(f"Calling API: {url}")
        # Generate correlation IDs
        correlator_id = str(uuid.uuid4())
        client_correlid = str(uuid.uuid4())
        # Call API from within browser's JavaScript context using fetch()
        # This automatically includes all cookies and session state
        result = await page.evaluate("""
            async ({url, correlatorId, clientCorrelId}) => {
                try {
                    const response = await fetch(url, {
                        method: 'GET',
                        credentials: 'include',  // Include cookies
                        headers: {
                            'accept': 'application/json',
                            'accept-language': 'en-US,en;q=0.9',
                            'cache-control': 'no-cache',
                            'content-type': 'application/json',
                            'correlatorid': correlatorId,
                            'pragma': 'no-cache',
                            'schwab-client-appid': 'AD00007800',
                            'schwab-client-channel': 'IO',
                            'schwab-client-correlid': clientCorrelId,
                            'schwab-resource-version': '2',
                        }
                    });
                    if (!response.ok) {
                        const errorText = await response.text();
                        return {
                            success: false,
                            status: response.status,
                            error: errorText
                        };
                    }
                    const data = await response.json();
                    return {
                        success: true,
                        status: response.status,
                        data: data
                    };
                } catch (error) {
                    return {
                        success: false,
                        error: error.toString()
                    };
                }
            }
        """, {'url': url, 'correlatorId': correlator_id, 'clientCorrelId': client_correlid})
        if not result.get('success'):
            if debug:
                status = result.get('status', 'unknown')
                error = result.get('error', 'unknown error')
                logger.debug(f"API returned status {status}: {str(error)[:200]}")
            return None
        data = result.get('data')
        if debug and data:
            logger.debug(f"API response keys: {list(data.keys()) if isinstance(data, dict) else 'list'}")
        return data
    except Exception as e:
        if debug:
            logger.debug(f"Error calling API {url}: {e}")
        return None
 async def extract_phase1_data_api(page: Page, ticker: str, debug: bool = False) -> EquityPhase1Data:
    """Extract Phase 1 data using Schwab's REST APIs.
    This is the API-based replacement for the DOM scraping approach.
    It calls Schwab's APIs directly using the authenticated session.
    Args:
        page: Playwright page with authenticated session
        ticker: Stock ticker symbol
        debug: Enable debug logging
    Returns:
        EquityPhase1Data with all extracted fields
    """
    if debug:
        logger.debug(f"Starting API-based Phase 1 extraction for {ticker}")
    base_url = "https://ausgateway.schwab.com/api/is.ResearchExperience/v1"
    # Build API URLs
    quote_url = f"{base_url}/quote?symbols={ticker}&isComplex=true"
    dividends_url = f"{base_url}/events/dividends?symbol={ticker}"
    earnings_url = f"{base_url}/events/earnings?symbols={ticker}"
    profile_url = f"{base_url}/shareprofile?symbols={ticker}&includeSubsidiaries=true"
    # Make API calls using Playwright's request context (includes cookies automatically)
    quote_data = await call_schwab_api(page, quote_url, debug)
    dividends_data = await call_schwab_api(page, dividends_url, debug)
    earnings_data = await call_schwab_api(page, earnings_url, debug)
    profile_data = await call_schwab_api(page, profile_url, debug)
    # Parse responses
    # Quote API returns a list, get first item
    if quote_data and isinstance(quote_data, list) and len(quote_data) > 0:
        quote = parse_quote_api_response(quote_data[0])
    elif quote_data and isinstance(quote_data, dict):
        quote = parse_quote_api_response(quote_data)
    else:
        quote = QuoteData()
    # Enhance quote with share profile data
    if profile_data:
        quote = parse_shareprofile_api_response(profile_data, quote)
    # Parse dividends
    dividends = parse_dividends_api_response(dividends_data) if dividends_data else EnhancedDividends()
    # Parse earnings
    earnings = parse_earnings_api_response(earnings_data) if earnings_data else EarningsData()
    # Calculate derived metrics
    calculated = CalculatedMetrics()
    if dividends.annual_rate and earnings.eps_ttm:
        calculated.payout_ratio = calculate_payout_ratio(
            dividends.annual_rate,
            earnings.eps_ttm
        )
    # Create Phase 1 data object
    phase1_data = EquityPhase1Data(
        ticker=ticker,
        quote=quote,
        dividends=dividends,
        earnings=earnings,
        calculated_metrics=calculated
    )
    if debug:
        logger.debug(f"API-based Phase 1 extraction complete for {ticker}")
        # Count populated fields (dataclasses with slots don't have __dict__)
        from dataclasses import fields as dataclass_fields
        quote_count = sum(1 for f in dataclass_fields(quote) if getattr(quote, f.name) is not None)
        div_count = sum(1 for f in dataclass_fields(dividends) if getattr(dividends, f.name) is not None)
        earn_count = sum(1 for f in dataclass_fields(earnings) if getattr(earnings, f.name) not in (None, []))
        logger.debug(f"  Quote fields populated: {quote_count}/21")
        logger.debug(f"  Dividend fields populated: {div_count}/9")
        logger.debug(f"  Earnings fields populated: {earn_count}/13")
    return phase1_data
--- a/schwab_scraper/features/equity/phase1_scraper.py
+++ b/schwab_scraper/features/equity/phase1_scraper.py
@@ -0,0 +1,786 @@
 """Phase 1: Essential Dividend Metrics Implementation (DEPRECATED)
 ⚠️ DEPRECATED: This DOM-scraping based approach has been replaced by phase1_api_scraper.py
 which uses Schwab's REST APIs directly. The API approach is more reliable, complete,
 and maintainable than DOM scraping.
 This module is kept for reference only. New code should use phase1_api_scraper.py.
 Old approach extracts from DOM:
 - Quote/Price Data (symbol bar)
 - Enhanced Dividend Information (forward-looking dates)
 - Core Earnings Metrics (EPS, forecasts)
 - Basic Valuation Ratios (P/E, Forward P/E, PEG)
 - Calculated Metrics (payout ratio)
 """
 from typing import Dict, Any, Optional
 import re
 import logging
 from ...core import QuoteData, EnhancedDividends, EarningsData, CalculatedMetrics, EquityPhase1Data
 logger = logging.getLogger(__name__)
 def _parse_float(value: Any) -> Optional[float]:
    """Safely parse a value to float, handling $ and % symbols."""
    if value is None:
        return None
    try:
        # Remove common formatting characters
        clean = str(value).strip().replace('$', '').replace(',', '').replace('%', '')
        if clean and clean != '--' and clean.lower() != 'n/a':
            return float(clean)
    except (ValueError, AttributeError):
        pass
    return None
 def _parse_int(value: Any) -> Optional[int]:
    """Safely parse a value to int."""
    if value is None:
        return None
    try:
        clean = str(value).strip().replace(',', '')
        if clean and clean != '--' and clean.lower() != 'n/a':
            return int(float(clean))
    except (ValueError, AttributeError):
        pass
    return None
 def _parse_volume(volume_str: str) -> Optional[int]:
    """Parse volume string like '8M', '22.4M', '1.2B' to integer."""
    if not volume_str:
        return None
    try:
        volume_str = volume_str.strip().upper()
        multiplier = 1
        if volume_str.endswith('K'):
            multiplier = 1_000
            volume_str = volume_str[:-1]
        elif volume_str.endswith('M'):
            multiplier = 1_000_000
            volume_str = volume_str[:-1]
        elif volume_str.endswith('B'):
            multiplier = 1_000_000_000
            volume_str = volume_str[:-1]
        value = float(volume_str)
        return int(value * multiplier)
    except (ValueError, AttributeError):
        return None
 def _parse_revenue(revenue_str: str) -> Optional[float]:
    """Parse revenue string like '$92.15B', '$1.5M' to dollar value."""
    if not revenue_str:
        return None
    try:
        revenue_str = revenue_str.strip().upper().replace('$', '').replace(',', '')
        multiplier = 1
        if revenue_str.endswith('K'):
            multiplier = 1_000
            revenue_str = revenue_str[:-1]
        elif revenue_str.endswith('M'):
            multiplier = 1_000_000
            revenue_str = revenue_str[:-1]
        elif revenue_str.endswith('B'):
            multiplier = 1_000_000_000
            revenue_str = revenue_str[:-1]
        elif revenue_str.endswith('T'):
            multiplier = 1_000_000_000_000
            revenue_str = revenue_str[:-1]
        value = float(revenue_str)
        return value * multiplier
    except (ValueError, AttributeError):
        return None
 async def extract_quote_data(page, ticker: str = "", debug: bool = False) -> QuoteData:
    """Extract quote/price data from symbol bar.
    Args:
        page: Playwright page object
        ticker: Stock ticker symbol (for pattern matching)
        debug: Enable debug logging
    Returns:
        QuoteData object with extracted fields
    """
    quote = QuoteData()
    try:
        if debug:
            logger.debug("Starting quote data extraction...")
        # Wait for symbol bar content (look for key labels)
        try:
            await page.wait_for_selector('#app-symbol-bar-component, text=Previous close', state='attached', timeout=15000)
        except Exception:
            if debug:
                logger.debug("Timeout waiting for symbol bar selector, attempting to parse whatever is there")
        # Extract symbol bar text content (fallback to body if specific component not found)
        symbol_bar_text = await page.evaluate('''
            () => {
                const symbolBar = document.querySelector('#app-symbol-bar-component');
                if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) return symbolBar.textContent;
                // If specific component not found, try to find the container with market data
                // Look for container with "Previous close"
                const labels = Array.from(document.querySelectorAll('span, div, p'));
                const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
                if (prevCloseLabel) {
                    // Return the parent's text content (go up a few levels to capture all data)
                    let parent = prevCloseLabel.parentElement;
                    let count = 0;
                    while (parent && count < 8) {
                        if (parent.textContent.length > 300) return parent.textContent;
                        parent = parent.parentElement;
                        count++;
                    }
                }
                return document.body.textContent || '';
            }
        ''')
        if debug:
            logger.debug(f"Symbol bar text (first 500 chars): {symbol_bar_text[:500]}")
        # Extract structured data
        quote_data = await page.evaluate(r'''
            (ticker) => {
                const data = {};
                // Helper to get text content from page
                const getText = () => {
                   const symbolBar = document.querySelector('#app-symbol-bar-component');
                   // Verify it looks like the right component by checking for "Previous close"
                   if (symbolBar && symbolBar.textContent && symbolBar.textContent.includes('Previous close')) {
                       return symbolBar.textContent;
                   }
                   // Fallback logic
                   const labels = Array.from(document.querySelectorAll('span, div, p'));
                   const prevCloseLabel = labels.find(el => el.textContent && el.textContent.includes('Previous close'));
                   if (prevCloseLabel) {
                        let parent = prevCloseLabel.parentElement;
                        let count = 0;
                        while (parent && count < 8) {
                            if (parent.textContent.length > 300) return parent.textContent;
                            parent = parent.parentElement;
                            count++;
                        }
                   }
                   // Last resort: body text
                   return document.body.textContent || '';
                };
                const fullText = getText();
                // Try to find price in quote container first for accuracy
                const priceElement = document.querySelector('.symbol-quote-container, [data-testid="quote-price"]');
                if (priceElement) {
                    const priceText = priceElement.textContent || '';
                    const priceMatch = priceText.match(/\$([0-9,]+\.[0-9]+)/);
                    if (priceMatch) data.price = priceMatch[1].replace(',', '');
                } else {
                    // Fallback regex for price if element not found
                    // Look for price near top or just regex
                    const priceMatch = fullText.match(/\$([0-9,]+\.[0-9]{2})(\s|[+-]|$)/);
                    if (priceMatch) data.price = priceMatch[1].replace(',', '');
                }
                // After hours (using \s* for robustness)
                const afterHoursMatch = fullText.match(/After hours:?\s*\$([0-9,.]+)/i);
                if (afterHoursMatch) data.after_hours_price = afterHoursMatch[1].replace(',', '');
                const afterHoursChangeMatch = fullText.match(/After hours:.*?([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
                if (afterHoursChangeMatch) {
                    data.after_hours_change = afterHoursChangeMatch[1].replace('$', '').replace(',', '');
                    data.after_hours_change_percent = afterHoursChangeMatch[2];
                }
                // Bid/Ask (using \s* for robustness)
                const bidMatch = fullText.match(/Bid\s*\$([0-9,.]+)/i);
                if (bidMatch) data.bid = bidMatch[1].replace(',', '');
                const askMatch = fullText.match(/Ask\s*\$([0-9,.]+)/i);
                if (askMatch) data.ask = askMatch[1].replace(',', '');
                const bidAskSizeMatch = fullText.match(/Bid\/Ask Size\s*([0-9]+\/[0-9]+)/i);
                if (bidAskSizeMatch) data.bid_ask_size = bidAskSizeMatch[1];
                // Previous close and open (using \s* instead of \s+)
                const prevCloseMatch = fullText.match(/Previous close\s*\$([0-9,.]+)/i);
                if (prevCloseMatch) data.previous_close = prevCloseMatch[1].replace(',', '');
                const openMatch = fullText.match(/Today's open\s*\$([0-9,.]+)/i);
                if (openMatch) data.open = openMatch[1].replace(',', '');
                // Volume (using \s*)
                const volumeMatch = fullText.match(/Today's volume\s*([0-9.]+[KMB]?)/i);
                if (volumeMatch) data.volume = volumeMatch[1];
                const volumeVsAvgMatch = fullText.match(/Today's volume\s*[0-9.]+[KMB]?\s*(Above Avg\.|Below Avg\.|Average)/i);
                if (volumeVsAvgMatch) data.volume_vs_avg = volumeVsAvgMatch[1];
                // Day range
                // Pattern: "Today's range low $200.81 Today's range high $203.45" or similar
                // We'll look for "low $X" and "high $Y" appearing after "Today's range"
                const dayRangeMatch = fullText.match(/Today's range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
                if (dayRangeMatch) {
                    data.day_range_low = dayRangeMatch[1].replace(',', '');
                    data.day_range_high = dayRangeMatch[2].replace(',', '');
                }
                // 52-week range
                const weekRangeMatch = fullText.match(/52-week range.*?low\s*\$([0-9,.]+).*?high\s*\$([0-9,.]+)/i);
                if (weekRangeMatch) {
                    data.week_52_low = weekRangeMatch[1].replace(',', '');
                    data.week_52_high = weekRangeMatch[2].replace(',', '');
                }
                // Market cap (may be in Share Profile section)
                const marketCapMatch = fullText.match(/Market Cap\s*\$([0-9.]+[KMBT])/i);
                if (marketCapMatch) data.market_cap = marketCapMatch[1];
                // Change and change percent
                // Try specific formatted pattern first: TICKER $PRICE CHANGE CHANGE%
                // e.g. "JNJ $201.95 -1.03 -0.51%"
                const standardPattern = fullText.match(/\$([0-9,.]+)\s*([+-]?[0-9,.]+)\s*([+-]?[0-9.]+)%/);
                if (standardPattern) {
                     if (!data.price) data.price = standardPattern[1].replace(',', '');
                     data.change = standardPattern[2];
                     data.change_percent = standardPattern[3];
                }
                let percentMatch = null;
                if (ticker && !data.change_percent) {
                    // Match: TICKER$digits.digits{2}percent%
                    const tickerPattern = new RegExp(ticker + '\\\\.?[\\s]*\\$([0-9,]+\\\\.[0-9]{2})[\\s]*([0-9.]+)%', 'i');
                    percentMatch = fullText.match(tickerPattern);
                    if (percentMatch) {
                        data.change_percent = percentMatch[2]; 
                    }
                }
                if (!data.change_percent) {
                    // Fallback: match any price+percent pattern with space
                    const fallbackMatch = fullText.match(/\$[0-9,.]+\s*([+-]?[0-9.]+)%/);
                    if (fallbackMatch) {
                        data.change_percent = fallbackMatch[1];
                    }
                }
                // Pattern 2: "+$1.23 (+0.45%)" or "-$1.23 (-0.45%)"
                let changeMatch = fullText.match(/([+-]\$[0-9,.]+)\s*\(([+-][0-9.]+)%\)/);
                // Pattern 3: "$193.08 +1.23 +0.64%" (price followed by change)
                if (!changeMatch) {
                    changeMatch = fullText.match(/\$[0-9,.]+\s*([+-][0-9,.]+)\s*([+-][0-9.]+)%/);
                }
                // Pattern 4: "Change: +1.23 (+0.64%)"
                if (!changeMatch) {
                    changeMatch = fullText.match(/Change:?\s*([+-][0-9,.]+)\s*\(([+-][0-9.]+)%\)/i);
                }
                if (changeMatch) {
                    data.change = changeMatch[1].replace('$', '').replace(',', '');
                    if (!data.change_percent) {
                        data.change_percent = changeMatch[2].replace(/[+]/g, '');
                    }
                }
                // Exchange - look for NYSE, NASDAQ, etc.
                const exchangeMatch = fullText.match(/\b(NYSE|NASDAQ|AMEX|OTC|BATS)\b/i);
                if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();
                return data;
            }
        ''', ticker)
        # Parse and assign values
        quote.price = _parse_float(quote_data.get('price'))
        quote.change = _parse_float(quote_data.get('change'))
        quote.change_percent = _parse_float(quote_data.get('change_percent'))
        quote.after_hours_price = _parse_float(quote_data.get('after_hours_price'))
        quote.after_hours_change = _parse_float(quote_data.get('after_hours_change'))
        quote.after_hours_change_percent = _parse_float(quote_data.get('after_hours_change_percent'))
        quote.bid = _parse_float(quote_data.get('bid'))
        quote.ask = _parse_float(quote_data.get('ask'))
        quote.bid_ask_size = quote_data.get('bid_ask_size')
        quote.previous_close = _parse_float(quote_data.get('previous_close'))
        quote.open = _parse_float(quote_data.get('open'))
        quote.volume = _parse_volume(quote_data.get('volume', ''))
        quote.volume_vs_avg = quote_data.get('volume_vs_avg')
        quote.day_range_low = _parse_float(quote_data.get('day_range_low'))
        quote.day_range_high = _parse_float(quote_data.get('day_range_high'))
        quote.week_52_low = _parse_float(quote_data.get('week_52_low'))
        quote.week_52_high = _parse_float(quote_data.get('week_52_high'))
        quote.market_cap = quote_data.get('market_cap')
        # Try to extract sector and exchange from page header
        header_data = await page.evaluate(r'''
            () => {
                const data = {};
                // Look for sector near company name
                const sectorElement = document.querySelector('[data-testid="sector"], .sector');
                if (sectorElement) {
                    data.sector = sectorElement.textContent.replace('Sector', '').trim();
                } else {
                    // Manual search for text containing "Sector"
                    const spans = Array.from(document.querySelectorAll('span'));
                    const sectorSpan = spans.find(el => el.textContent && el.textContent.includes('Sector'));
                    if (sectorSpan) {
                         data.sector = sectorSpan.textContent.replace('Sector', '').replace(':', '').trim();
                    }
                }
                // Look for exchange near ticker
                const exchangeElement = document.querySelector('[data-testid="exchange"], .exchange');
                if (exchangeElement) {
                    data.exchange = exchangeElement.textContent.trim();
                }
                // Fallback: parse from page text
                const pageText = document.body.textContent || '';
                if (!data.sector) {
                    const sectorMatch = pageText.match(/Sector[:\s]+([A-Za-z\s&]+)/);
                    if (sectorMatch) data.sector = sectorMatch[1].trim();
                }
                if (!data.exchange) {
                    const exchangeMatch = pageText.match(/(NYSE|NASDAQ|AMEX|OTC)/i);
                    if (exchangeMatch) data.exchange = exchangeMatch[1].toUpperCase();
                }
                return data;
            }
        ''')
        quote.sector = header_data.get('sector')
        quote.exchange = header_data.get('exchange')
        if debug:
            logger.debug(f"Extracted quote data: price={quote.price}, volume={quote.volume}, "
                        f"52w_range={quote.week_52_low}-{quote.week_52_high}")
    except Exception as e:
        if debug:
            logger.debug(f"Error extracting quote data: {e}")
    return quote
 async def extract_enhanced_dividends(page, debug: bool = False) -> EnhancedDividends:
    """Extract enhanced dividend data including next payment dates.
    Args:
        page: Playwright page object
        debug: Enable debug logging
    Returns:
        EnhancedDividends object with extracted fields
    """
    dividends = EnhancedDividends()
    try:
        if debug:
            logger.debug("Starting enhanced dividend extraction...")
        # Wait for dividends panel to load
        await page.wait_for_selector('#dividends', timeout=15000)
        # Scroll to dividends panel
        await page.evaluate('''
            () => {
                const dividendsPanel = document.querySelector('#dividends');
                if (dividendsPanel) {
                    dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
                }
            }
        ''')
        await page.wait_for_timeout(1000)
        # CRITICAL: Click on the panel header to trigger content loading
        # Schwab's panels don't auto-load - they need to be clicked
        if debug:
            logger.debug("Clicking dividends panel header to trigger content load...")
        try:
            dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title, #dividends-togglechevron-button')
            if dividends_header:
                await dividends_header.click()
                await page.wait_for_timeout(2000)
                if debug:
                    logger.debug("Clicked dividends panel header successfully")
        except Exception as e:
            if debug:
                logger.debug(f"Could not click dividends header: {e}")
        # Wait for content to load after click
        await page.wait_for_timeout(1000)
        # Extract dividend data
        dividend_data = await page.evaluate('''
            () => {
                const data = {};
                const dividendsPanel = document.querySelector('#dividends');
                if (!dividendsPanel) return data;
                const fullText = dividendsPanel.textContent || '';
                // DEBUG: Return sample of text for debugging
                data._debug_text_sample = fullText.substring(0, 800);
                // Next dividend payment
                const nextPaymentMatch = fullText.match(/Next Dividend Payment\\s*\\$([0-9.]+)/i);
                if (nextPaymentMatch) data.next_payment = nextPaymentMatch[1];
                // Next pay date
                const nextPayDateMatch = fullText.match(/Next Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (nextPayDateMatch) data.next_pay_date = nextPayDateMatch[1];
                // Next ex-date
                const nextExDateMatch = fullText.match(/Next Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (nextExDateMatch) data.next_ex_date = nextExDateMatch[1];
                // Previous dividend payment
                const prevPaymentMatch = fullText.match(/Previous Dividend Payment\\s*\\$([0-9.]+)/i);
                if (prevPaymentMatch) data.previous_payment = prevPaymentMatch[1];
                // Previous pay date
                const prevPayDateMatch = fullText.match(/Previous Pay Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (prevPayDateMatch) data.previous_pay_date = prevPayDateMatch[1];
                // Previous ex-date
                const prevExDateMatch = fullText.match(/Previous Ex-Date\\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/i);
                if (prevExDateMatch) data.previous_ex_date = prevExDateMatch[1];
                // Frequency
                const frequencyMatch = fullText.match(/Frequency\\s*(Quarterly|Monthly|Annual|Semi-Annual)/i);
                if (frequencyMatch) data.frequency = frequencyMatch[1];
                // Annual Dividend Rate (IAD)
                const annualRateMatch = fullText.match(/Annual Dividend Rate.*?\\$([0-9.]+)/i);
                if (annualRateMatch) data.annual_rate = annualRateMatch[1];
                // Annual Dividend Yield - appears after "Annual Dividend Yield" text
                // Text pattern: "Annual Dividend Yield...2.71%"
                const yieldMatch = fullText.match(/Annual Dividend Yield[\\s\\S]{0,300}?([0-9]+\\.[0-9]+)%/i);
                if (yieldMatch) data.annual_yield = yieldMatch[1];
                return data;
            }
        ''')
        if debug and dividend_data.get('_debug_text_sample'):
            logger.debug(f"Dividend panel text sample: {dividend_data['_debug_text_sample']}")
        # Parse and assign values
        dividends.next_payment = _parse_float(dividend_data.get('next_payment'))
        dividends.next_pay_date = dividend_data.get('next_pay_date')
        dividends.next_ex_date = dividend_data.get('next_ex_date')
        dividends.previous_payment = _parse_float(dividend_data.get('previous_payment'))
        dividends.previous_pay_date = dividend_data.get('previous_pay_date')
        dividends.previous_ex_date = dividend_data.get('previous_ex_date')
        dividends.frequency = dividend_data.get('frequency')
        dividends.annual_rate = _parse_float(dividend_data.get('annual_rate'))
        dividends.annual_yield = _parse_float(dividend_data.get('annual_yield'))
        if debug:
            logger.debug(f"Extracted dividend data: next_payment={dividends.next_payment}, "
                        f"next_pay_date={dividends.next_pay_date}, annual_rate={dividends.annual_rate}")
    except Exception as e:
        if debug:
            logger.debug(f"Error extracting dividend data: {e}")
    return dividends
 async def extract_earnings_data(page, debug: bool = False) -> EarningsData:
    """Extract earnings metrics and forecasts.
    Args:
        page: Playwright page object
        debug: Enable debug logging
    Returns:
        EarningsData object with extracted fields
    """
    earnings = EarningsData()
    try:
        if debug:
            logger.debug("Starting earnings data extraction...")
        # Wait for earnings panel to load
        await page.wait_for_selector('#expected-earnings', timeout=15000)
        # Scroll to earnings panel
        await page.evaluate('''
            () => {
                const earningsPanel = document.querySelector('#expected-earnings');
                if (earningsPanel) {
                    earningsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
                }
            }
        ''')
        await page.wait_for_timeout(1000)
        # CRITICAL: Click on the panel header to trigger content loading
        # Schwab's panels don't auto-load - they need to be clicked
        if debug:
            logger.debug("Clicking earnings panel header to trigger content load...")
        try:
            earnings_header = await page.query_selector('#expected-earnings h2, #expected-earnings .sdps-panel__title, #expected-earnings-heading, #expected-earnings-togglechevron-button')
            if earnings_header:
                await earnings_header.click()
                await page.wait_for_timeout(2000)
                if debug:
                    logger.debug("Clicked earnings panel header successfully")
        except Exception as e:
            if debug:
                logger.debug(f"Could not click earnings header: {e}")
        # Wait for content to load after click
        await page.wait_for_timeout(1000)
        # Check for and click "Show More" if present
        try:
            # Use JS to find and click - most robust way
            clicked = await page.evaluate('''
                () => {
                    const panel = document.querySelector('#expected-earnings');
                    if (!panel) return false;
                    // Find any element with "Show More" text
                    const elements = Array.from(panel.querySelectorAll('a, button, span, div'));
                    const showMore = elements.find(el => el.textContent.trim().toLowerCase() === "show more");
                    if (showMore) {
                        showMore.click();
                        return true;
                    }
                    return false;
                }
            ''')
            if clicked:
                if debug:
                    logger.debug("found and clicked 'Show More' via JS")
                await page.wait_for_timeout(2000)
            elif debug:
                logger.debug("'Show More' not found or not clickable")
        except Exception as e:
            if debug:
                logger.debug(f"Error checking for Show More: {e}")
        # Extract earnings data
        earnings_data = await page.evaluate(r'''
            (debug) => {
                const data = {};
                // Helper to get text content including Shadow DOMs
                const getDeepText = (root) => {
                    if (!root) return '';
                    if (root.nodeType === Node.TEXT_NODE) return root.textContent;
                    if (root.nodeType === Node.ELEMENT_NODE && root.shadowRoot) {
                        return getDeepText(root.shadowRoot);
                    }
                    let text = '';
                    const children = root.childNodes;
                    for (let i = 0; i < children.length; i++) {
                        text += getDeepText(children[i]);
                    }
                    return text;
                };
                const earningsPanel = document.querySelector('#expected-earnings');
                let fullText = '';
                if (earningsPanel) {
                     fullText = getDeepText(earningsPanel);
                }
                // Fallback to body deep text if panel seems empty
                if (fullText.length < 500 || !fullText.includes("Announcement")) {
                    fullText = getDeepText(document.body);
                }
                // Next earnings announcement - robust regex checking for various patterns
                let nextAnnouncementMatch = fullText.match(/Next Earnings Announcement.*?([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
                if (!nextAnnouncementMatch) {
                     // Try alternate pattern: Announcement: 12/12/2025
                     nextAnnouncementMatch = fullText.match(/Announcement:?\s*([0-9]{2}\/[0-9]{2}\/[0-9]{4})/i);
                }
                if (nextAnnouncementMatch) data.next_announcement_date = nextAnnouncementMatch[1];
                // Announcement timing
                const timingMatch = fullText.match(/(Before Market Open|After Market Close)/i);
                if (timingMatch) data.announcement_timing = timingMatch[1];
                // Number of analysts
                const analystsMatch = fullText.match(/With ([0-9]+) analysts covering/i);
                if (analystsMatch) data.analysts_covering = analystsMatch[1];
                // Consensus estimate
                const consensusMatch = fullText.match(/consensus.*?estimate is \\$([0-9.]+)/i);
                if (consensusMatch) data.consensus_estimate = consensusMatch[1];
                // High/Low estimates
                const highLowMatch = fullText.match(/high and low estimates are \\$([0-9.]+) and \\$([0-9.]+)/i);
                if (highLowMatch) {
                    data.estimate_high = highLowMatch[1];
                    data.estimate_low = highLowMatch[2];
                }
                // EPS TTM (multiple patterns)
                let epsMatch = fullText.match(/EPS\s*\(TTM\)\s*(?:Value)?\s*\$?([0-9.-]+)/i);
                if (!epsMatch) epsMatch = fullText.match(/Earnings per Share\s*\(?TTM\)?\s*(?:Value)?\s*\$?([0-9.-]+)/i);
                if (!epsMatch) epsMatch = fullText.match(/EPS\s+(?:Value)?\s*([0-9.-]+)/i);
                if (epsMatch) data.eps_ttm = epsMatch[1];
                // Revenue TTM
                let revenueMatch = fullText.match(/Revenue\s*\(TTM\)\s*(?:Value)?\s*\$([0-9.]+[KMBT]?)/i);
                if (!revenueMatch) revenueMatch = fullText.match(/Revenue\s+(?:Value)?\s*\$([0-9.]+[KMBT])/i);
                if (revenueMatch) data.revenue_ttm = revenueMatch[1];
                // P/E TTM (multiple patterns)
                let peMatch = fullText.match(/Price[\/\s]*Earnings\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (!peMatch) peMatch = fullText.match(/P[\/\s]*E\s*\(?TTM\)?\s*(?:Value)?\s*([0-9.]+)/i);
                if (!peMatch) peMatch = fullText.match(/PE Ratio\s*\(TTM\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (peMatch) data.pe_ttm = peMatch[1];
                // Forward P/E
                let forwardPeMatch = fullText.match(/Forward\s+P[\/\s]*E\s*(?:Value)?\s*([0-9.]+)/i);
                if (!forwardPeMatch) forwardPeMatch = fullText.match(/P[\/\s]*E\s*\(Forward\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (forwardPeMatch) data.forward_pe = forwardPeMatch[1];
                // PEG Ratio
                let pegMatch = fullText.match(/Price\s+to\s+Earnings[\/\s]*Growth\s*\(PEG\)\s*(?:Value)?\s*([0-9.]+)/i);
                if (!pegMatch) pegMatch = fullText.match(/PEG\s*Ratio?\s*(?:Value)?\s*([0-9.]+)/i);
                if (pegMatch) data.peg_ratio = pegMatch[1];
                // Recent beats/misses (simplified - just extract beat amounts)
                const beatMatches = fullText.matchAll(/Beat.*?\$([0-9.]+)/gi);
                data.recent_beats = [];
                for (const match of beatMatches) {
                    data.recent_beats.push(match[1]);
                }
                return data;
            }
        ''', debug)
        # Parse and assign values
        earnings.next_announcement_date = earnings_data.get('next_announcement_date')
        earnings.announcement_timing = earnings_data.get('announcement_timing')
        earnings.analysts_covering = _parse_int(earnings_data.get('analysts_covering'))
        earnings.consensus_estimate = _parse_float(earnings_data.get('consensus_estimate'))
        earnings.estimate_high = _parse_float(earnings_data.get('estimate_high'))
        earnings.estimate_low = _parse_float(earnings_data.get('estimate_low'))
        earnings.eps_ttm = _parse_float(earnings_data.get('eps_ttm'))
        earnings.revenue_ttm = _parse_revenue(earnings_data.get('revenue_ttm', ''))
        earnings.pe_ttm = _parse_float(earnings_data.get('pe_ttm'))
        earnings.forward_pe = _parse_float(earnings_data.get('forward_pe'))
        earnings.peg_ratio = _parse_float(earnings_data.get('peg_ratio'))
        # Store recent beats as list of dicts
        if earnings_data.get('recent_beats'):
            earnings.recent_beats = [
                {'beat_amount': _parse_float(beat)} 
                for beat in earnings_data.get('recent_beats', [])
            ]
        if debug:
            logger.debug(f"Extracted earnings data: eps_ttm={earnings.eps_ttm}, "
                        f"pe_ttm={earnings.pe_ttm}, forward_pe={earnings.forward_pe}")
    except Exception as e:
        if debug:
            logger.debug(f"Error extracting earnings data: {e}")
    return earnings
 def calculate_payout_ratio(annual_dividend: Optional[float], eps_ttm: Optional[float]) -> Optional[float]:
    """Calculate dividend payout ratio.
    Formula: (Annual Dividend Rate / EPS TTM) × 100
    Args:
        annual_dividend: Annual dividend rate per share
        eps_ttm: Earnings per share (trailing twelve months)
    Returns:
        Payout ratio as percentage, or None if cannot calculate
    """
    if annual_dividend and eps_ttm and eps_ttm > 0:
        ratio = (annual_dividend / eps_ttm) * 100
        return round(ratio, 2)
    return None
 async def extract_phase1_data(page, debug: bool = False) -> EquityPhase1Data:
    """Extract all Phase 1 data points.
    Args:
        page: Playwright page object
        debug: Enable debug output
    Returns:
        EquityPhase1Data object with all extracted data
    """
    if debug:
        logger.debug("Starting Phase 1 data extraction...")
    # Wait for page to stabilize
    await page.wait_for_timeout(3000)
    # Extract ticker from page URL
    ticker = await page.evaluate('''
        () => {
            const url = window.location.href;
            const match = url.match(/stocks\\/([A-Z]+)/i);
            return match ? match[1].toUpperCase() : '';
        }
    ''')
    # Extract each section
    quote = await extract_quote_data(page, ticker=ticker, debug=debug)
    dividends = await extract_enhanced_dividends(page, debug=debug)
    earnings = await extract_earnings_data(page, debug=debug)
    # Calculate derived metrics
    calculated = CalculatedMetrics()
    if dividends.annual_rate and earnings.eps_ttm:
        calculated.payout_ratio = calculate_payout_ratio(
            dividends.annual_rate,
            earnings.eps_ttm
        )
    # Create Phase 1 data object
    phase1_data = EquityPhase1Data(
        ticker=ticker,
        quote=quote,
        dividends=dividends,
        earnings=earnings,
        calculated_metrics=calculated
    )
    if debug:
        logger.debug(f"Phase 1 extraction complete for {ticker}")
    return phase1_data
--- a/schwab_scraper/features/equity/scraper.py
+++ b/schwab_scraper/features/equity/scraper.py
@@ -0,0 +1,977 @@
 from typing import Dict, Any, Optional
 from ...utils.logging import save_debug_artifact
 def should_replace_dividend_value(existing_value: Optional[str], new_value: Optional[str]) -> bool:
    """
    Decide whether to replace an existing dividend field value with a new one.
    Rules:
    - Never replace with empty/None values
    - Replace if there is no existing value
    - Replace if the existing value is "Show More" or contains "Show More"
    - Otherwise, keep the existing (good) data
    """
    if not new_value or not str(new_value).strip():
        return False
    if not existing_value:
        return True
    existing_text = str(existing_value)
    if existing_text == 'Show More' or 'Show More' in existing_text:
        return True
    return False
 async def extract_dividend_data(page, debug: bool = False) -> Dict[str, Any]:
    """
    Extract dividend information from Schwab stock page.
    Returns dictionary with dividend data fields.
    """
    dividend_data: Dict[str, Any] = {}
    try:
        if debug:
            print("DEBUG: Starting dividend data extraction...")
            # Take initial screenshot to see page state
            png = await page.screenshot(full_page=True)
            path = save_debug_artifact("debug_dividend_start.png", png)
            print(f"DEBUG: Initial screenshot saved as {path}")
        # Wait for the dividends section to load dynamically
        if debug:
            print("DEBUG: Waiting for dividends section to load...")
        try:
            # First wait for the dividends panel to appear
            await page.wait_for_selector('#dividends', timeout=15000)
            if debug:
                print("DEBUG: #dividends panel found")
            # Wait for dividend content to load dynamically
            dividend_loaded = False
            max_attempts = 5  # Reduced from 10 for faster tests
            attempt = 0
            while not dividend_loaded and attempt < max_attempts:
                attempt += 1
                if debug:
                    print(f"DEBUG: Attempt {attempt}/{max_attempts} - Waiting for dynamic dividend content...")
                # Check if the dividends section has been populated with actual content
                dividend_status = await page.evaluate('''
                    () => {
                        const result = { loaded: false, debug: {} };
                        // Look for the dividends panel content that should be populated
                        const dividendsPanel = document.querySelector('#dividends');
                        if (dividendsPanel) {
                            const panelBody = dividendsPanel.querySelector('.sdps-panel__body');
                            if (panelBody) {
                                const textContent = panelBody.textContent || '';
                                result.debug.panelBodyLength = textContent.length;
                                result.debug.panelBodySample = textContent.substring(0, 200);
                                // Check if the panel has been populated with actual dividend text
                                // (not just empty comments)
                                const hasRealContent = textContent.length > 50 && (
                                    textContent.includes('Previous Dividend') ||
                                    textContent.includes('Pay Date') ||
                                    textContent.includes('Ex-Date') ||
                                    textContent.includes('Frequency') ||
                                    textContent.includes('Annual Dividend') ||
                                    textContent.includes('$') ||
                                    textContent.includes('%')
                                );
                                if (hasRealContent) {
                                    result.loaded = true;
                                    return result;
                                }
                            }
                        }
                        // Alternative: check for stock-dividends component
                        const stockDividends = document.querySelector('stock-dividends');
                        if (stockDividends) {
                            const text = stockDividends.textContent || '';
                            result.debug.stockDividendsLength = text.length;
                            result.debug.stockDividendsSample = text.substring(0, 100);
                            if (text.length > 20 && text.includes('$')) {
                                result.loaded = true;
                                return result;
                            }
                        }
                        // Alternative: check for any elements with dividend-related content
                        const allElements = document.querySelectorAll('#dividends *');
                        result.debug.totalElements = allElements.length;
                        for (let elem of allElements) {
                            const text = elem.textContent || '';
                            if (text.includes('Previous Dividend Payment') || 
                                (text.includes('$') && text.includes('.'))) {
                                result.loaded = true;
                                result.debug.foundInElement = elem.tagName + '.' + elem.className;
                                return result;
                            }
                        }
                        return result;
                    }
                ''')
                if debug:
                    print(f"DEBUG: Dividend status: {dividend_status}")
                dividend_loaded = dividend_status.get('loaded', False)
                if dividend_loaded:
                    if debug:
                        print("DEBUG: Dynamic dividend content loaded!")
                        png = await page.screenshot(full_page=True)
                        path = save_debug_artifact("debug_dividend_content_loaded.png", png)
                        print(f"DEBUG: Screenshot after content loaded: {path}")
                    break
                # Wait between attempts to allow for async loading
                await page.wait_for_timeout(1000)  # Reduced from 2000ms for faster tests
            if not dividend_loaded:
                if debug:
                    print("DEBUG: Basic dividend content did not auto-load - this suggests the page is not behaving as expected")
                    print("DEBUG: Expected behavior: Basic dividend info should be visible without clicking 'Show More'")
                    # Try to force a page refresh or trigger loading
                    print("DEBUG: Attempting to trigger dividend content loading...")
                    try:
                        # Try scrolling to the dividend section to trigger lazy loading
                        await page.evaluate('''
                            () => {
                                const dividendsPanel = document.querySelector('#dividends');
                                if (dividendsPanel) {
                                    dividendsPanel.scrollIntoView({ behavior: 'smooth', block: 'center' });
                                }
                            }
                        ''')
                        await page.wait_for_timeout(3000)
                        # Try clicking on the dividends panel header to ensure it's active
                        try:
                            dividends_header = await page.query_selector('#dividends h2, #dividends .sdps-panel__title')
                            if dividends_header:
                                await dividends_header.click()
                                await page.wait_for_timeout(2000)
                                print("DEBUG: Clicked on dividends panel header")
                        except:
                            pass
                        # Check one more time if content loaded
                        final_status = await page.evaluate('''
                            () => {
                                const dividendsPanel = document.querySelector('#dividends');
                                if (dividendsPanel) {
                                    const panelBody = dividendsPanel.querySelector('.sdps-panel__body');
                                    if (panelBody) {
                                        const textContent = panelBody.textContent || '';
                                        return {
                                            length: textContent.length,
                                            sample: textContent.substring(0, 500),
                                            hasBasicData: textContent.includes('$') && (
                                                textContent.includes('Previous') || 
                                                textContent.includes('Pay Date') ||
                                                textContent.includes('Ex-Date')
                                            )
                                        };
                                    }
                                }
                                return { length: 0, sample: '', hasBasicData: false };
                            }
                        ''')
                        if debug:
                            print(f"DEBUG: Final dividend panel status: {final_status}")
                        if final_status.get('hasBasicData'):
                            print("DEBUG: Basic dividend data now detected after manual triggering!")
                            dividend_loaded = True
                            # Extract the data immediately while it's loaded
                            immediate_extraction = await page.evaluate(r'''
                                () => {
                                    const results = {};
                                    const dividendsPanel = document.querySelector('#dividends');
                                    if (dividendsPanel) {
                                        const panelBody = dividendsPanel.querySelector('.sdps-panel__body');
                                        if (panelBody) {
                                            const fullText = panelBody.textContent || '';
                                            // Extract data using pattern matching from the full text
                                            const patterns = {
                                                'Previous Dividend Payment': /Previous Dividend Payment\s*\$([0-9]+\.[0-9]+)/,
                                                'Previous Pay Date': /Previous Pay Date\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/,
                                                'Previous Ex-Date': /Previous Ex-Date\s*([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/,
                                                'Frequency': /Frequency\s*([A-Za-z]+)/,
                                                'Annual Dividend Rate': /(?:Annual Dividend Rate|IAD).*?\$([0-9]+\.[0-9]+)/,
                                                'Annual Dividend Yield': /([0-9]+\.[0-9]+%)(?=\s|Annual|$)/
                                            };
                                            for (const [field, pattern] of Object.entries(patterns)) {
                                                const match = fullText.match(pattern);
                                                if (match) {
                                                    if (field === 'Previous Dividend Payment' || field === 'Annual Dividend Rate') {
                                                        results[field] = '$' + match[1];
                                                    } else {
                                                        results[field] = match[1];
                                                    }
                                                }
                                            }
                                        }
                                    }
                                    return results;
                                }
                            ''')
                            if debug:
                                print(f"DEBUG: Immediate extraction results: {immediate_extraction}")
                            if immediate_extraction:
                                dividend_data.update(immediate_extraction)
                                # Clean up the Frequency field if it has extra text
                                if 'Frequency' in dividend_data and 'Quarterly' in dividend_data['Frequency']:
                                    dividend_data['Frequency'] = 'Quarterly'
                    except Exception as e:
                        if debug:
                            print(f"DEBUG: Error during manual triggering: {e}")
                    png = await page.screenshot(full_page=True)
                    path = save_debug_artifact("debug_dividend_timeout.png", png)
                    print(f"DEBUG: Screenshot after timeout: {path}")
        except Exception as e:
            if debug:
                print(f"DEBUG: Error waiting for dividend content: {e}")
        # Check for dividend grid directly without clicking
        if debug:
            print("DEBUG: Checking for #dividend-grid...")
        dividend_grid_found = False
        try:
            await page.wait_for_selector('#dividend-grid', timeout=10000)
            dividend_grid_found = True
            if debug:
                print("DEBUG: #dividend-grid found!")
                png = await page.screenshot(full_page=True)
                path = save_debug_artifact("debug_dividend_grid_found.png", png)
                print(f"DEBUG: Screenshot with dividend grid: {path}")
        except:
            if debug:
                print("DEBUG: #dividend-grid not found initially")
                png = await page.screenshot(full_page=True)
                path = save_debug_artifact("debug_dividend_no_grid.png", png)
                print(f"DEBUG: Screenshot without grid: {path}")
        # Try to scroll to the dividend section to ensure it's in view
        if debug:
            print("DEBUG: Scrolling to stock-dividends component...")
        try:
            await page.evaluate('''
                () => {
                    const stockDividends = document.querySelector('stock-dividends');
                    if (stockDividends) {
                        stockDividends.scrollIntoView({ behavior: 'smooth', block: 'center' });
                    }
                }
            ''')
            await page.wait_for_timeout(3000)
            if debug:
                png = await page.screenshot(full_page=True)
                path = save_debug_artifact("debug_dividend_after_scroll.png", png)
                print(f"DEBUG: Screenshot after scroll: {path}")
            # Check again for dividend grid after scrolling
            try:
                await page.wait_for_selector('#dividend-grid', timeout=5000)
                dividend_grid_found = True
                if debug:
                    print("DEBUG: #dividend-grid found after scroll!")
                    png = await page.screenshot(full_page=True)
                    path = save_debug_artifact("debug_dividend_grid_after_scroll.png", png)
                    print(f"DEBUG: Screenshot with grid after scroll: {path}")
            except:
                if debug:
                    print("DEBUG: #dividend-grid still not found after scroll")
        except Exception as e:
            if debug:
                print(f"DEBUG: Error during scroll attempt: {e}")
        # Common dividend section selectors used by financial websites
        dividend_selectors = [
            '#dividend-grid',  # Primary target based on user feedback
            'stock-dividends',  # Secondary target - the web component
            '#dividend-section',
            '#dividends-section', 
            '.dividend-summary',
            '.dividends-summary',
            'div[data-testid*="dividend"]',
            'div[aria-label*="dividend"]',
            '[class*="dividend"]',
            'section:has-text("Dividend")',
            'div:has-text("Previous Dividend Payment")'
        ]
        # Try to find dividend section
        dividend_section = None
        for selector in dividend_selectors:
            try:
                if await page.is_visible(selector):
                    dividend_section = selector
                    if debug:
                        print(f"DEBUG: Found dividend section with selector: {selector}")
                    break
            except:
                continue
        if not dividend_section:
            if debug:
                print("DEBUG: No dividend section found, trying broader search...")
                # In debug mode, capture the page content to help identify selectors
                page_content = await page.content()
                path_html = save_debug_artifact("debug_dividend_page.html", page_content)
                print(f"DEBUG: Page HTML saved to {path_html} for analysis")
                # Also save a screenshot to see the visual layout
                png = await page.screenshot(full_page=True)
                path_png = save_debug_artifact("debug_dividend_page.png", png)
                print(f"DEBUG: Page screenshot saved to {path_png}")
            # Fallback: look for dividend-related text anywhere on page
            dividend_text_exists = await page.evaluate('''
                () => {
                    const text = document.body.innerText.toLowerCase();
                    return text.includes('dividend') || text.includes('ex-date') || text.includes('pay date') || text.includes('previous dividend') || text.includes('iad');
                }
            ''')
            if debug:
                print(f"DEBUG: Dividend-related text found on page: {dividend_text_exists}")
                # Try scrolling down to reveal more content
                await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
                await page.wait_for_timeout(2000)
                # Extract all text content that might contain dividend info
                dividend_related_text = await page.evaluate('''
                    () => {
                        const text = document.body.innerText;
                        const lines = text.split('\n');
                        const dividendLines = lines.filter(line => {
                            const lower = line.toLowerCase();
                            return lower.includes('dividend') || lower.includes('ex-date') || 
                                   lower.includes('pay date') || lower.includes('previous') ||
                                   lower.includes('iad') || lower.includes('frequency') ||
                                   lower.includes('quarterly') || lower.includes('$0.26') ||
                                   lower.includes('0.4865%') || lower.includes('$1.04') ||
                                   lower.includes('annual dividend') || lower.includes('yield');
                        });
                        return dividendLines;
                    }
                ''')
                print(f"DEBUG: Found dividend-related text lines: {dividend_related_text}")
                # Try a more comprehensive search for dividend data
                all_dividend_info = await page.evaluate('''
                    () => {
                        // Look for elements containing common dividend field names
                        const fieldNames = [
                            'Previous Dividend Payment', 'Next Dividend Payment',
                            'Previous Pay Date', 'Next Pay Date',
                            'Previous Ex-Date', 'Next Ex-Date', 'Ex-Date',
                            'Frequency', 'Annual Dividend Rate', 'IAD',
                            'Annual Dividend Yield', 'Dividend Yield'
                        ];
                        const results = {};
                        fieldNames.forEach(fieldName => {
                            // Search for elements containing this field name
                            const elements = Array.from(document.querySelectorAll('*')).filter(el => 
                                el.textContent && el.textContent.includes(fieldName) && 
                                el.children.length === 0  // Text nodes only
                            );
                            elements.forEach(el => {
                                // Look for value in nearby elements
                                const parent = el.parentElement;
                                if (parent) {
                                    const siblings = Array.from(parent.children);
                                    const currentIndex = siblings.indexOf(el);
                                    // Check next siblings for values
                                    for (let i = currentIndex + 1; i < siblings.length; i++) {
                                        const sibling = siblings[i];
                                        const text = sibling.textContent.trim();
                                        if (text && text !== fieldName && text.length > 0 && text.length < 50) {
                                            results[fieldName] = text;
                                            break;
                                        }
                                    }
                                    // Check same element for values after the field name
                                    const fullText = el.textContent;
                                    const fieldIndex = fullText.indexOf(fieldName);
                                    if (fieldIndex >= 0) {
                                        const afterField = fullText.substring(fieldIndex + fieldName.length).trim();
                                        if (afterField && afterField.length > 0 && afterField.length < 50) {
                                            results[fieldName] = afterField;
                                        }
                                    }
                                }
                            });
                        });
                        return results;
                    }
                ''')
                print(f"DEBUG: Comprehensive dividend search results: {all_dividend_info}")
                # If we found data in the comprehensive search, use it only if we don't already have good data
                if all_dividend_info:
                    for field, value in all_dividend_info.items():
                        if value and value.strip():
                            existing_value = dividend_data.get(field, '')
                            if should_replace_dividend_value(existing_value, value):
                                dividend_data[field] = value.strip()
                                if debug:
                                    print(f"DEBUG: Added dividend field from comprehensive search: {field} = {value}")
                            elif debug:
                                print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring comprehensive search value: {value})")
            if not dividend_text_exists:
                if debug:
                    print("DEBUG: No dividend-related content found on page")
                return dividend_data
            # Use body as fallback section for broad search
            dividend_section = 'body'
            if debug:
                print("DEBUG: Using body as dividend section for broad search")
        # If we found the dividend grid, use specific selectors based on user feedback
        if dividend_section == '#dividend-grid':
            if debug:
                print("DEBUG: Using specific dividend grid selectors...")
            try:
                # First check if dividend grid is actually present and populated
                grid_status = await page.evaluate('''
                    () => {
                        const dividendGrid = document.querySelector('#dividend-grid');
                        if (!dividendGrid) return { found: false, message: 'No #dividend-grid element found' };
                        const textContent = dividendGrid.textContent || '';
                        const hasContent = textContent.trim().length > 50;
                        const childCount = dividendGrid.children.length;
                        return {
                            found: true,
                            hasContent,
                            textLength: textContent.length,
                            childCount,
                            preview: textContent.substring(0, 200),
                            message: `Grid found with ${childCount} children, ${textContent.length} chars`
                        };
                    }
                ''')
                if debug:
                    print(f"DEBUG: Dividend grid status: {grid_status}")
                # Extract dividend data using improved selectors
                specific_dividend_data = await page.evaluate(r'''
                    () => {
                        const results = {};
                        // Check if dividend grid exists and has content
                        const dividendGrid = document.querySelector('#dividend-grid');
                        if (dividendGrid) {
                            const allGridText = dividendGrid.textContent || '';
                            const lines = allGridText.split('\n').map(line => line.trim()).filter(line => line.length > 0);
                            // Try structured approach first - look for rows/cells
                            const dividendRows = dividendGrid.querySelectorAll('div[class*="row"], tr, .dividend-row, div:has(div)');
                            dividendRows.forEach((row, rowIndex) => {
                                const rowText = row.textContent || '';
                                // Look for dividend payment info
                                if (rowText.includes('Dividend Payment') || (rowText.includes('Previous') && rowText.includes('$'))) {
                                    const amountMatch = rowText.match(/\$[0-9]+\.[0-9]+/);
                                    if (amountMatch && !results['Previous Dividend Payment']) {
                                        results['Previous Dividend Payment'] = amountMatch[0];
                                    }
                                    // Look for dates in the same row
                                    const dateMatches = rowText.match(/([A-Za-z]+ [0-9]{1,2}, [0-9]{4})/g);
                                    if (dateMatches) {
                                        if (dateMatches.length >= 1 && !results['Previous Pay Date']) results['Previous Pay Date'] = dateMatches[0];
                                        if (dateMatches.length >= 2 && !results['Previous Ex-Date']) results['Previous Ex-Date'] = dateMatches[1];
                                    }
                                }
                            });
                            // Fallback: Parse all lines systematically
                            for (let i = 0; i < lines.length; i++) {
                                const line = lines[i];
                                const nextLine = i + 1 < lines.length ? lines[i + 1] : '';
                                // Match dividend payment
                                if ((line.includes('Previous Dividend Payment') || line.includes('Dividend Payment')) && !results['Previous Dividend Payment']) {
                                    const amountPattern = /\$[0-9]+\.[0-9]+/;
                                    let amount = line.match(amountPattern) || nextLine.match(amountPattern);
                                    if (amount) results['Previous Dividend Payment'] = amount[0];
                                }
                                // Match pay date
                                if (line.includes('Pay Date') && !results['Previous Pay Date']) {
                                    const datePattern = /[A-Za-z]{3,9} [0-9]{1,2}, [0-9]{4}/;
                                    let date = line.match(datePattern) || nextLine.match(datePattern);
                                    if (date) results['Previous Pay Date'] = date[0];
                                }
                                // Match ex-date
                                if (line.includes('Ex-Date') && !results['Previous Ex-Date']) {
                                    const datePattern = /[A-Za-z]{3,9} [0-9]{1,2}, [0-9]{4}/;
                                    let date = line.match(datePattern) || nextLine.match(datePattern);
                                    if (date) results['Previous Ex-Date'] = date[0];
                                }
                                // Match frequency
                                if (line.includes('Frequency') && !results['Frequency']) {
                                    const freqLine = line + ' ' + nextLine;
                                    if (freqLine.toLowerCase().includes('quarterly')) results['Frequency'] = 'Quarterly';
                                    else if (freqLine.toLowerCase().includes('monthly')) results['Frequency'] = 'Monthly';
                                    else if (freqLine.toLowerCase().includes('annual')) results['Frequency'] = 'Annual';
                                    else if (freqLine.toLowerCase().includes('semi')) results['Frequency'] = 'Semi-Annual';
                                }
                                // Match annual dividend rate
                                if ((line.includes('Annual Dividend Rate') || line.includes('IAD')) && !results['Annual Dividend Rate']) {
                                    const amountPattern = /\$[0-9]+\.[0-9]+/;
                                    let amount = line.match(amountPattern) || nextLine.match(amountPattern);
                                    if (amount) results['Annual Dividend Rate'] = amount[0];
                                }
                                // Match annual dividend yield
                                if (line.includes('Annual Dividend Yield') && !results['Annual Dividend Yield']) {
                                    const percentPattern = /[0-9]+\.[0-9]+%/;
                                    let percent = line.match(percentPattern) || nextLine.match(percentPattern);
                                    if (percent) results['Annual Dividend Yield'] = percent[0];
                                }
                            }
                        }
                        return results;
                    }
                ''')
                if debug:
                    print(f"DEBUG: Specific dividend grid extraction results: {specific_dividend_data}")
                # Add the extracted data to dividend_data only if we don't already have good data
                if specific_dividend_data:
                    for field, value in specific_dividend_data.items():
                        existing_value = dividend_data.get(field, '')
                        if should_replace_dividend_value(existing_value, value):
                            dividend_data[field] = value
                            if debug:
                                print(f"DEBUG: Updated {field} from specific extraction: {value}")
                        elif debug:
                            print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring specific extraction value: {value})")
            except Exception as e:
                if debug:
                    print(f"DEBUG: Error in specific dividend grid extraction: {e}")
        # Extract dividend data using the correct structure from gemini analysis
        if debug:
            print("DEBUG: Extracting dividend data from dividend-grid structure...")
        # First try to extract data from the dynamically loaded dividend content
        try:
            dividend_dynamic_data = await page.evaluate(r'''
                () => {
                    const results = {};
                    // Strategy 1: Look for any dividend grid structure that was loaded
                    const dividendGrid = document.querySelector('#dividend-grid');
                    if (dividendGrid) {
                        const rows = dividendGrid.querySelectorAll('div.sdps-row, .row');
                        for (let row of rows) {
                            const cells = row.querySelectorAll('div[class*="col-"]');
                            if (cells.length >= 2) {
                                const label = cells[0].textContent.trim();
                                const value = cells[1].textContent.trim();
                                // Map the labels to our expected field names
                                if (label.includes('Previous Dividend Payment') || label.includes('Dividend Payment')) {
                                    results['Previous Dividend Payment'] = value;
                                } else if (label.includes('Previous Pay Date') || label.includes('Pay Date')) {
                                    results['Previous Pay Date'] = value;
                                } else if (label.includes('Previous Ex-Date') || label.includes('Ex-Date')) {
                                    results['Previous Ex-Date'] = value;
                                } else if (label.includes('Frequency')) {
                                    results['Frequency'] = value;
                                } else if (label.includes('Annual Dividend Rate') || label.includes('IAD')) {
                                    results['Annual Dividend Rate'] = value;
                                } else if (label.includes('Annual Dividend Yield')) {
                                    results['Annual Dividend Yield'] = value;
                                }
                            }
                        }
                        if (Object.keys(results).length > 0) {
                            return results;
                        }
                    }
                    // Strategy 2: Look for stock-dividends component content
                    const stockDividends = document.querySelector('stock-dividends');
                    if (stockDividends) {
                        const allText = stockDividends.textContent || '';
                        const lines = allText.split('\n').map(line => line.trim()).filter(line => line);
                        for (let i = 0; i < lines.length; i++) {
                            const line = lines[i];
                            const nextLine = i + 1 < lines.length ? lines[i + 1] : '';
                            if (line.includes('Previous Dividend Payment') || line.includes('Dividend Payment')) {
                                const amountMatch = (line + ' ' + nextLine).match(/\$[0-9]+\.[0-9]+/);
                                if (amountMatch) results['Previous Dividend Payment'] = amountMatch[0];
                            } else if (line.includes('Pay Date')) {
                                const dateMatch = (line + ' ' + nextLine).match(/[A-Za-z]+ [0-9]{1,2}, [0-9]{4}/);
                                if (dateMatch) results['Previous Pay Date'] = dateMatch[0];
                            } else if (line.includes('Ex-Date')) {
                                const dateMatch = (line + ' ' + nextLine).match(/[A-Za-z]+ [0-9]{1,2}, [0-9]{4}/);
                                if (dateMatch) results['Previous Ex-Date'] = dateMatch[0];
                            } else if (line.includes('Frequency')) {
                                if (line.toLowerCase().includes('quarterly') || nextLine.toLowerCase().includes('quarterly')) {
                                    results['Frequency'] = 'Quarterly';
                                } else if (line.toLowerCase().includes('monthly') || nextLine.toLowerCase().includes('monthly')) {
                                    results['Frequency'] = 'Monthly';
                                } else if (line.toLowerCase().includes('annual') || nextLine.toLowerCase().includes('annual')) {
                                    results['Frequency'] = 'Annual';
                                }
                            } else if (line.includes('Annual Dividend Rate') || line.includes('IAD')) {
                                const amountMatch = (line + ' ' + nextLine).match(/\$[0-9]+\.[0-9]+/);
                                if (amountMatch) results['Annual Dividend Rate'] = amountMatch[0];
                            } else if (line.includes('Annual Dividend Yield')) {
                                const percentMatch = (line + ' ' + nextLine).match(/[0-9]+\.[0-9]+%/);
                                if (percentMatch) results['Annual Dividend Yield'] = percentMatch[0];
                            }
                        }
                        if (Object.keys(results).length > 0) {
                            return results;
                        }
                    }
                    // Strategy 3: Look within entire dividends panel for any structured content
                    const dividendsPanel = document.querySelector('#dividends');
                    if (dividendsPanel) {
                        const allElements = dividendsPanel.querySelectorAll('*');
                        for (let elem of allElements) {
                            const text = elem.textContent || '';
                            // Look for dollar amounts near dividend-related text
                            if (text.includes('Previous Dividend Payment') || text.includes('Dividend Payment')) {
                                const parent = elem.parentElement;
                                if (parent) {
                                    const siblings = Array.from(parent.children);
                                    const currentIndex = siblings.indexOf(elem);
                                    // Check next siblings for values
                                    for (let j = currentIndex + 1; j < siblings.length; j++) {
                                        const sibling = siblings[j];
                                        const siblingText = sibling.textContent.trim();
                                        const amountMatch = siblingText.match(/\$[0-9]+\.[0-9]+/);
                                        if (amountMatch) {
                                            results['Previous Dividend Payment'] = amountMatch[0];
                                            break;
                                        }
                                    }
                                }
                            }
                            // Similar logic for other fields...
                            // (truncated for brevity but would include Pay Date, Ex-Date, etc.)
                        }
                    }
                    return results;
                }
            ''')
            if debug:
                print(f"DEBUG: Dynamic dividend extraction results: {dividend_dynamic_data}")
            if dividend_dynamic_data:
                for field, value in dividend_dynamic_data.items():
                    existing_value = dividend_data.get(field, '')
                    if should_replace_dividend_value(existing_value, value):
                        dividend_data[field] = value
                        if debug:
                            print(f"DEBUG: Updated {field} from dynamic extraction: {value}")
                    elif debug:
                        print(f"DEBUG: Keeping existing good data for {field}: {existing_value} (ignoring dynamic extraction value: {value})")
        except Exception as e:
            if debug:
                print(f"DEBUG: Error in dynamic dividend extraction: {e}")
        # Define dividend fields and their possible selectors as fallback
        dividend_fields = {
            'Previous Dividend Payment': [
                '#dividend-grid div:has-text("Previous Dividend Payment") ~ div',
                '#dividend-grid div:has-text("Dividend Payment") ~ div',
                '#dividends span:has-text("Previous Dividend Payment") + span',
                '#dividends div:has-text("Previous Dividend Payment") + div',
                '#dividends *:has-text("Previous Dividend Payment") ~ *',
                'stock-dividends span:has-text("Previous Dividend Payment") + span',
                'stock-dividends div:has-text("Previous Dividend Payment") + div',
                'span:has-text("Previous Dividend Payment") + span',
                'div:has-text("Previous Dividend Payment") + div',
                '*:has-text("Previous Dividend Payment") ~ *',
                'span:has-text("Next Dividend Payment") + span',
                'div:has-text("Next Dividend Payment") + div',
                '*:has-text("Next Dividend Payment") ~ *',
                '[data-field="dividend-payment"]',
                '.dividend-payment'
            ],
            'Previous Pay Date': [
                '#dividend-grid div:has-text("Previous Pay Date") ~ div',
                '#dividend-grid div:has-text("Pay Date") ~ div',
                '#dividends span:has-text("Previous Pay Date") + span',
                '#dividends div:has-text("Previous Pay Date") + div',
                '#dividends *:has-text("Previous Pay Date") ~ *',
                'stock-dividends span:has-text("Previous Pay Date") + span',
                'stock-dividends div:has-text("Previous Pay Date") + div',
                'span:has-text("Previous Pay Date") + span',
                'div:has-text("Previous Pay Date") + div', 
                '*:has-text("Previous Pay Date") ~ *',
                'span:has-text("Next Pay Date") + span',
                'div:has-text("Next Pay Date") + div', 
                '*:has-text("Next Pay Date") ~ *',
                '*:has-text("Pay Date") ~ *',
                '[data-field="pay-date"]',
                '.pay-date'
            ],
            'Previous Ex-Date': [
                '#dividend-grid div:has-text("Previous Ex-Date") ~ div',
                '#dividend-grid div:has-text("Ex-Date") ~ div',
                '#dividends span:has-text("Previous Ex-Date") + span',
                '#dividends div:has-text("Previous Ex-Date") + div',
                '#dividends *:has-text("Previous Ex-Date") ~ *',
                'stock-dividends span:has-text("Previous Ex-Date") + span',
                'stock-dividends div:has-text("Previous Ex-Date") + div',
                'span:has-text("Previous Ex-Date") + span',
                'div:has-text("Previous Ex-Date") + div',
                '*:has-text("Previous Ex-Date") ~ *',
                'span:has-text("Next Ex-Date") + span',
                'div:has-text("Next Ex-Date") + div',
                '*:has-text("Next Ex-Date") ~ *',
                '*:has-text("Ex-Date") ~ *',
                '[data-field="ex-date"]',
                '.ex-date'
            ],
            'Frequency': [
                '#dividend-grid div:has-text("Frequency") ~ div',
                '#dividends span:has-text("Frequency") + span',
                '#dividends div:has-text("Frequency") + div',
                '#dividends *:has-text("Frequency") ~ *',
                'stock-dividends span:has-text("Frequency") + span',
                'stock-dividends div:has-text("Frequency") + div',
                'span:has-text("Frequency") + span',
                'div:has-text("Frequency") + div',
                '*:has-text("Frequency") ~ *',
                '[data-field="frequency"]',
                '.dividend-frequency',
                '.frequency'
            ],
            'Annual Dividend Rate': [
                '#dividend-grid div:has-text("Annual Dividend Rate") ~ div',
                '#dividend-grid div:has-text("IAD") ~ div',
                '#dividends span:has-text("Annual Dividend Rate") + span',
                '#dividends div:has-text("Annual Dividend Rate") + div',
                '#dividends *:has-text("Annual Dividend Rate") ~ *',
                '#dividends span:has-text("IAD") + span',
                '#dividends *:has-text("IAD") ~ *',
                'stock-dividends span:has-text("Annual Dividend Rate") + span',
                'stock-dividends div:has-text("Annual Dividend Rate") + div',
                'stock-dividends span:has-text("IAD") + span',
                'span:has-text("Annual Dividend Rate") + span',
                'div:has-text("Annual Dividend Rate") + div',
                '*:has-text("Annual Dividend Rate") ~ *',
                'span:has-text("IAD") + span',
                '*:has-text("IAD") ~ *',
                '[data-field="annual-rate"]',
                '.annual-dividend-rate'
            ],
            'Annual Dividend Yield': [
                '#dividend-grid div:has-text("Annual Dividend Yield") ~ div',
                '#dividends span:has-text("Annual Dividend Yield") + span',
                '#dividends div:has-text("Annual Dividend Yield") + div',
                '#dividends *:has-text("Annual Dividend Yield") ~ *',
                'stock-dividends span:has-text("Annual Dividend Yield") + span',
                'stock-dividends div:has-text("Annual Dividend Yield") + div',
                'span:has-text("Annual Dividend Yield") + span',
                'div:has-text("Annual Dividend Yield") + div',
                '*:has-text("Annual Dividend Yield") ~ *',
                '[data-field="dividend-yield"]',
                '.dividend-yield'
            ]
        }
        # Extract each dividend field using multiple selector strategies
        for field_name, selectors in dividend_fields.items():
            field_found = False
            # Try each selector for this field
            for selector in selectors:
                if field_found:
                    break
                try:
                    # Scope search within dividend section if found, otherwise search whole page
                    full_selector = f'{dividend_section} {selector}' if dividend_section != 'body' else selector
                    if await page.is_visible(full_selector, timeout=1000):
                        value = await page.inner_text(full_selector)
                        clean_value = value.strip()
                        if clean_value and clean_value != field_name:  # Ensure we got actual value, not the label
                            existing_value = dividend_data.get(field_name, '')
                            if should_replace_dividend_value(existing_value, clean_value):
                                dividend_data[field_name] = clean_value
                                field_found = True
                                if debug:
                                    print(f"DEBUG: Found {field_name}: {clean_value} (selector: {full_selector})")
                            elif debug:
                                print(f"DEBUG: Keeping existing good data for {field_name}: {existing_value} (ignoring selector-based value: {clean_value})")
                            break
                except:
                    continue
            # If standard selectors failed, try JavaScript-based text search as fallback
            if not field_found:
                try:
                    # Try multiple variations of the field name
                    search_terms = [field_name]
                    if "Previous" in field_name:
                        search_terms.append(field_name.replace("Previous", "Next"))
                    if "Annual Dividend Rate" in field_name:
                        search_terms.append("IAD")
                    if "Annual Dividend Yield" in field_name:
                        search_terms.append("Dividend Yield")
                    for search_term in search_terms:
                        if field_found:
                            break
                        value = await page.evaluate(rf'''
                            () => {{
                                const searchText = "{search_term}";
                                // First check within the dividends section specifically
                                const dividendsPanel = document.querySelector('#dividends');
                                const stockDividends = document.querySelector('stock-dividends');
                                const searchContainers = [dividendsPanel, stockDividends, document];
                                for (let container of searchContainers) {{
                                    if (!container) continue;
                                    const elements = Array.from(container.querySelectorAll('*'));
                                    for (let elem of elements) {{
                                        if (elem.textContent && elem.textContent.includes(searchText)) {{
                                            // Look for next sibling or nearby element with value
                                            let candidate = elem.nextElementSibling;
                                            if (candidate && candidate.textContent && 
                                                !candidate.textContent.includes(searchText) &&
                                                candidate.textContent.trim().length > 0) {{
                                                return candidate.textContent.trim();
                                            }}
                                            // Try parent's next sibling
                                            candidate = elem.parentElement?.nextElementSibling;
                                            if (candidate && candidate.textContent && 
                                                !candidate.textContent.includes(searchText) &&
                                                candidate.textContent.trim().length > 0) {{
                                                return candidate.textContent.trim();
                                            }}
                                            // Try looking in the same element's parent for nearby text
                                            const parent = elem.parentElement;
                                            if (parent) {{
                                                const parentText = parent.textContent;
                                                const lines = parentText.split('\n');
                                                for (let i = 0; i < lines.length; i++) {{
                                                    if (lines[i].includes(searchText) && i + 1 < lines.length) {{
                                                        const nextLine = lines[i + 1].trim();
                                                        if (nextLine && !nextLine.includes(searchText)) {{
                                                            return nextLine;
                                                        }}
                                                    }}
                                                }}
                                            }}
                                        }}
                                    }}
                                    // If found in this container, stop searching
                                    if (container !== document) {{
                                        break;
                                    }}
                                }}
                                return null;
                            }}
                        ''')
                        if value and value.strip():
                            existing_value = dividend_data.get(field_name, '')
                            if should_replace_dividend_value(existing_value, value):
                                dividend_data[field_name] = value.strip()
                                field_found = True
                                if debug:
                                    print(f"DEBUG: Found {field_name} via JS search with term '{search_term}': {value}")
                            elif debug:
                                print(f"DEBUG: Keeping existing good data for {field_name}: {existing_value} (ignoring JS search value: {value})")
                            break
                except Exception as e:
                    if debug:
                        print(f"DEBUG: Could not find {field_name}: {e}")
                    continue
        if debug:
            print(f"DEBUG: Extracted dividend data: {dividend_data}")
        return dividend_data
    except Exception as e:
        if debug:
            print(f"DEBUG: Error extracting dividend data: {e}")
        return dividend_data
 async def extract(page, debug: bool = False) -> Dict[str, Any]:
    """Compatibility wrapper to call `extract_dividend_data`"""
    return await extract_dividend_data(page, debug=debug)
--- a/schwab_scraper/features/equity/service.py
+++ b/schwab_scraper/features/equity/service.py
@@ -0,0 +1,452 @@
 import time
 from typing import Any, Dict, Optional
 import logging
 from ...core.config import load_config, get_playwright_url
 from ...browser.auth import ensure_cookies
 from ...browser.client import connect, new_context, new_page
 from ...browser.navigation import goto_with_auth_check
 from ...core import Envelope, ErrorType, MorningstarData, EquityPhase1Data, fail, ok
 from .morningstar import find_report, download_report_as_bytes
 from ...storage.cache import ensure_cache_dir, cache_filename, read_cached_pdf, write_cached_pdf
 from .parser import parse as parse_pdf
 from .scraper import extract_dividend_data
 from .phase1_scraper import extract_phase1_data  # DOM scraping - the working approach
 import re
 def extract_company_name_from_title(page_title: str, ticker: str):
    if not page_title:
        return None
    try:
        title = (
            page_title.replace(" | Charles Schwab", "")
            .replace(" - Charles Schwab", "")
            .replace("Stock Quote & Summary", "")
            .replace("Stock Research", "")
            .replace("Research", "")
            .replace("- Research", "")
        )
        pattern = rf"^(.+?)\s*\({re.escape(ticker.upper())}\)"
        match = re.match(pattern, title, re.IGNORECASE)
        if match:
            company_name = match.group(1).strip()
            company_name = company_name.replace(" -", "").strip()
            if len(company_name) > 1 and not company_name.isdigit():
                return company_name
        for separator in [" |", " -"]:
            if separator in title:
                potential_name = title.split(separator)[0].strip()
                if potential_name.upper() != ticker.upper() and len(potential_name) > 1:
                    return potential_name
        return None
    except Exception:
        return None
 async def get_equity_phase1_data(ticker: str, debug: bool = False) -> Envelope[EquityPhase1Data]:
    """Get Phase 1 enhanced equity data for a ticker.
    Extracts:
    - Quote/Price Data (symbol bar)
    - Enhanced Dividend Information (forward-looking dates)
    - Core Earnings Metrics (EPS, forecasts)
    - Basic Valuation Ratios (P/E, Forward P/E, PEG)
    - Calculated Metrics (payout ratio)
    Args:
        ticker: Stock ticker symbol
        debug: Enable debug logging
    Returns:
        Envelope containing EquityPhase1Data or error
    """
    ticker = ticker.upper()
    logger = logging.getLogger(__name__)
    if debug:
        logger.setLevel(logging.DEBUG)
        logger.debug(f"Starting get_equity_phase1_data for {ticker}")
    # Session management
    cookies = await ensure_cookies()
    if not cookies:
        return fail(
            "Unable to establish a session. Provide credentials in config.json or a valid cookies.json.",
            ErrorType.AUTHENTICATION,
            retryable=False,
        )
    config = load_config()
    playwright_url = get_playwright_url(config)
    # Browser orchestration
    context = None
    page = None
    p, browser = await connect(playwright_url)
    try:
        context = await new_context(browser, cookies=cookies)
        page = await new_page(context)
        # Navigate to stock research page
        timeout = 30000 if debug else 45000
        success = await goto_with_auth_check(
            page,
            context,
            f"https://client.schwab.com/app/research/#/stocks/{ticker}",
            debug=debug,
            timeout=timeout,
        )
        if not success:
            return fail(
                "Authentication failed while navigating to research page",
                ErrorType.AUTHENTICATION,
                retryable=True,
            )
        # Validate ticker by checking for stock page content
        if debug:
            logger.debug(f"Current page URL: {page.url}")
        try:
            # Wait for stock-specific content to appear
            await page.wait_for_selector(
                'span.sdps-title-3.sc-sdps-solo-layout:not(.sdps-font-bold), #morningstar-section',
                timeout=10000,
                state='visible'
            )
        except Exception as wait_err:
            if debug:
                logger.debug(f"Timeout waiting for stock content: {wait_err}")
            return fail(
                f"Invalid ticker: {ticker}. This appears not to be a valid stock ticker.",
                ErrorType.VALIDATION,
                retryable=False,
            )
        # Validate content
        try:
            has_valid_content = await page.evaluate('''
                () => {
                    const nameSpan = document.querySelector('span.sdps-title-3.sc-sdps-solo-layout:not(.sdps-font-bold)');
                    if (nameSpan && nameSpan.textContent && nameSpan.textContent.trim().length > 2) {
                        return true;
                    }
                    const morningstarSection = document.querySelector('#morningstar-section');
                    if (morningstarSection) {
                        return true;
                    }
                    return false;
                }
            ''')
            if not has_valid_content:
                return fail(
                    f"Invalid ticker: {ticker}. This appears not to be a valid stock ticker.",
                    ErrorType.VALIDATION,
                    retryable=False,
                )
        except Exception as e:
            logger.debug(f"Error checking for valid content: {e}")
            return fail(
                f"Invalid ticker: {ticker}. Unable to validate ticker.",
                ErrorType.VALIDATION,
                retryable=False,
            )
        # Extract Phase 1 data using improved DOM scraping
        # Note: API approach failed due to CORS restrictions
        phase1_data = await extract_phase1_data(page, debug=debug)
        return ok(phase1_data)
    finally:
        try:
            if page is not None:
                await page.close()
        except Exception:
            pass
        try:
            if context is not None:
                await context.close()
        except Exception:
            pass
        for handle in (browser,):
            try:
                if handle is not None:
                    await handle.close()
            except Exception:
                pass
        try:
            if p is not None:
                await p.stop()
        except Exception:
            pass
 async def get_morningstar_data(ticker: str, debug: bool = False) -> Envelope[MorningstarData]:
    ticker = ticker.upper()
    ensure_cache_dir()
    logger = logging.getLogger(__name__)
    if debug:
        logger.setLevel(logging.DEBUG)
        logger.debug(f"Starting get_morningstar_data for {ticker}")
    # Session management
    cookies = await ensure_cookies()
    if not cookies:
        return fail(
            "Unable to establish a session. Provide credentials in config.json or a valid cookies.json.",
            ErrorType.AUTHENTICATION,
            retryable=False,
        )
    config = load_config()
    playwright_url = get_playwright_url(config)
    # Browser orchestration
    context = None
    page = None
    p, browser = await connect(playwright_url)
    try:
        context = await new_context(browser, cookies=cookies)
        page = await new_page(context)
        # Use shared auth-aware navigation helper for consistency
        # Use shorter timeout for tests to speed up execution
        timeout = 30000 if debug else 45000
        success = await goto_with_auth_check(
            page,
            context,
            f"https://client.schwab.com/app/research/#/stocks/{ticker}",
            debug=debug,
            timeout=timeout,
        )
        if not success:
            return fail(
                "Authentication failed while navigating to research page",
                ErrorType.AUTHENTICATION,
                retryable=True,
            )
        # Validate ticker by checking for stock page content
        # Schwab doesn't redirect on invalid tickers, but the page content is empty/invalid
        if debug:
            logger.debug(f"Current page URL: {page.url}")
        # Wait for page content to load - Schwab's research page loads asynchronously
        # Give it time to populate the DOM before validation
        try:
            # Wait for either company name or Morningstar section to appear
            # This indicates the page has loaded stock-specific content
            await page.wait_for_selector(
                'span.sdps-title-3.sc-sdps-solo-layout:not(.sdps-font-bold), #morningstar-section',
                timeout=10000,
                state='visible'
            )
        except Exception as wait_err:
            # If neither selector appears after 10 seconds, likely an invalid ticker
            if debug:
                logger.debug(f"Timeout waiting for stock content: {wait_err}")
            return fail(
                f"Invalid ticker: {ticker}. This appears not to be a valid stock ticker.",
                ErrorType.VALIDATION,
                retryable=False,
            )
        # Additional validation: check if we have valid stock page content
        try:
            has_valid_content = await page.evaluate('''
                () => {
                    // Look for company name span (valid stock pages have this)
                    const nameSpan = document.querySelector('span.sdps-title-3.sc-sdps-solo-layout:not(.sdps-font-bold)');
                    if (nameSpan && nameSpan.textContent && nameSpan.textContent.trim().length > 2) {
                        return true;
                    }
                    // Look for Morningstar section (valid stock pages have this)
                    const morningstarSection = document.querySelector('#morningstar-section');
                    if (morningstarSection) {
                        return true;
                    }
                    // Look for company profile description (valid stock pages have this)
                    const profileText = document.querySelector('p.sdps-text-body.sc-sdps-solo-layout');
                    if (profileText && profileText.textContent && profileText.textContent.trim().length > 50) {
                        return true;
                    }
                    // Look for any stock-related content
                    const stockContent = document.querySelector('#stock-details, #quote, [data-testid="stock-quote"]');
                    if (stockContent) {
                        return true;
                    }
                    return false;
                }
            ''')
            if debug:
                logger.debug(f"Valid stock content detected: {has_valid_content}")
            if not has_valid_content:
                if debug:
                    logger.debug(f"Invalid ticker detected - no stock content found")
                return fail(
                    f"Invalid ticker: {ticker}. This appears not to be a valid stock ticker.",
                    ErrorType.VALIDATION,
                    retryable=False,
                )
        except Exception as e:
            logger.debug(f"Error checking for valid content: {e}")
            # If we can't check, assume invalid and return error
            return fail(
                f"Invalid ticker: {ticker}. Unable to validate ticker.",
                ErrorType.VALIDATION,
                retryable=False,
            )
        # Company name - extract from page elements
        company_name = None
        try:
            # Strategy 1: Extract from company name span element
            company_name = await page.evaluate('''
                () => {
                    // Look for company name in title span
                    const nameSpan = document.querySelector('span.sdps-title-3.sc-sdps-solo-layout:not(.sdps-font-bold)');
                    if (nameSpan && nameSpan.textContent && nameSpan.textContent.trim().length > 2) {
                        return nameSpan.textContent.trim();
                    }
                    // Fallback: Extract from company profile description
                    const profileText = document.querySelector('p.sdps-text-body.sc-sdps-solo-layout');
                    if (profileText && profileText.textContent) {
                        const text = profileText.textContent.trim();
                        // Extract company name before " designs" or " is" or " provides"
                        const match = text.match(/^([A-Za-z0-9\\s&\\.,'-]+?)(?:\\s+(?:designs|is|provides|manufactures|operates|offers|engages))/i);
                        if (match) {
                            return match[1].trim();
                        }
                    }
                    return null;
                }
            ''')
            if debug and company_name:
                logger.debug(f"Extracted company name: {company_name}")
        except Exception as e:
            logger.debug(f"Company name extraction error: {e}")
        # Morningstar section wait
        try:
            await page.wait_for_selector('#morningstar-section', timeout=30000)
        except Exception:
            logger.debug("#morningstar-section not found within timeout")
        # Dividends
        try:
            dividend_data = await extract_dividend_data(page, debug=debug)
        except Exception as exc:
            logger.debug(f"Dividend extraction error: {exc}")
            dividend_data = {}
        # Find report and download/cache
        report_url, report_date = await find_report(page, debug=debug)
        data: Dict[str, Any] = {}
        if report_date:
            data["Morningstar Equity Report Date"] = report_date.strip()
        if report_url:
            # Only store actual URL, not the __CLICK_TO_OPEN__ marker
            if report_url != '__CLICK_TO_OPEN__':
                data["Morningstar Equity Report URL"] = report_url
            pdf_bytes = await download_report_as_bytes(page, report_url, debug=debug)
        else:
            pdf_bytes = None
        parsed_data: Dict[str, Any] = {}
        if pdf_bytes:
            if report_date:
                from datetime import datetime
                try:
                    dt = datetime.strptime(report_date, "%b %d, %Y")
                    formatted_date = dt.strftime("%m-%d-%Y")
                except Exception:
                    formatted_date = report_date.replace(" ", "-")
            else:
                formatted_date = time.strftime("%m-%d-%Y")
            write_cached_pdf(ticker, formatted_date, pdf_bytes)
            try:
                parsed_data = parse_pdf(pdf_bytes)
                parsed_data["source"] = "live"
            except Exception as exc:
                logger.debug(f"PDF parsing failed: {exc}")
                parsed_data = {"error": "Failed to parse Morningstar report"}
        else:
            cached = read_cached_pdf(ticker)
            if cached:
                try:
                    parsed_data = parse_pdf(cached)
                    parsed_data["source"] = "cache"
                except Exception as exc:
                    logger.debug(f"Cached PDF parsing failed: {exc}")
                    parsed_data = {"error": "Failed to parse cached Morningstar report"}
            else:
                parsed_data = {"error": f"Failed to download and no cache available for {ticker}"}
        morningstar = MorningstarData(
            ticker=ticker,
            company_name=company_name,
            previous_dividend_payment=dividend_data.get("Previous Dividend Payment"),
            previous_pay_date=dividend_data.get("Previous Pay Date"),
            previous_ex_date=dividend_data.get("Previous Ex-Dividend Date"),
            frequency=dividend_data.get("Frequency"),
            annual_dividend_rate=dividend_data.get("Annual Dividend Rate"),
            annual_dividend_yield=dividend_data.get("Annual Dividend Yield"),
            fair_value=parsed_data.get("Fair Value"),
            economic_moat=parsed_data.get("Economic Moat"),
            capital_allocation=parsed_data.get("Capital Allocation"),
            rating=_safe_int(parsed_data.get("Morningstar Rating")),
            one_star_price=parsed_data.get("1-Star Price"),
            five_star_price=parsed_data.get("5-Star Price"),
            assessment=parsed_data.get("Assessment"),
            range_52_week=parsed_data.get("52-Week Range"),
            dividend_yield=parsed_data.get("Dividend Yield"),
            investment_style=parsed_data.get("Investment Style"),
            report_url=data.get("Morningstar Equity Report URL"),
            report_date=data.get("Morningstar Equity Report Date"),
            source=parsed_data.get("source"),
        )
        if parsed_data.get("error"):
            return fail(parsed_data["error"], ErrorType.PARSING, retryable=True)
        return ok(morningstar)
    finally:
        try:
            if page is not None:
                await page.close()
        except Exception:
            pass
        try:
            if context is not None:
                await context.close()
        except Exception:
            pass
        for handle in (browser,):
            try:
                if handle is not None:
                    await handle.close()
            except Exception:
                pass
        try:
            if p is not None:
                await p.stop()
        except Exception:
            pass
 def _safe_int(value: Any) -> Optional[int]:
    if value is None:
        return None
    try:
        return int(str(value).strip())
    except (TypeError, ValueError):
        return None
--- a/schwab_scraper/features/transactions/init.py
+++ b/schwab_scraper/features/transactions/init.py
--- a/schwab_scraper/features/transactions/parser.py
+++ b/schwab_scraper/features/transactions/parser.py
@@ -0,0 +1,47 @@
 from __future__ import annotations
 import csv
 import io
 from dataclasses import asdict
 from typing import List, Dict, Any
 from ...core.models import TransactionRecord, TransactionData, AccountInfo
 def parse_csv_content(csv_bytes: bytes) -> List[TransactionRecord]:
    """
    Parse Schwab transaction CSV bytes into a list of TransactionRecord.
    Expected headers:
    Date,Action,Symbol,Description,Quantity,Price,Fees & Comm,Amount
    """
    text_stream = io.StringIO(csv_bytes.decode("utf-8"))
    reader = csv.DictReader(text_stream)
    records: List[TransactionRecord] = []
    for row in reader:
        records.append(
            TransactionRecord(
                date=(row.get("Date") or "").strip(),
                action=(row.get("Action") or "").strip(),
                symbol=(row.get("Symbol") or None) or None,
                description=(row.get("Description") or "").strip(),
                quantity=(row.get("Quantity") or None) or None,
                price=(row.get("Price") or None) or None,
                fees_comm=(row.get("Fees & Comm") or None) or None,
                amount=(row.get("Amount") or None) or None,
            )
        )
    return records
 def to_dicts(transaction_data: TransactionData) -> Dict[str, Any]:
    """Convert TransactionData to plain dicts for JSON output."""
    return {
        "account_info": asdict(transaction_data.account_info),
        "transactions": [asdict(r) for r in transaction_data.transactions],
        "date_range": transaction_data.date_range,
        "export_date": transaction_data.export_date,
        "total_transactions": transaction_data.total_transactions,
        "source": transaction_data.source,
    }
--- a/schwab_scraper/features/transactions/scraper.py
+++ b/schwab_scraper/features/transactions/scraper.py
--- a/schwab_scraper/features/transactions/service.py
+++ b/schwab_scraper/features/transactions/service.py
@@ -0,0 +1,833 @@
 from __future__ import annotations
 import json
 from typing import Optional, Dict, Any, List
 from datetime import datetime, timezone
 import re
 from ...browser.auth import ensure_cookies
 from ...core.config import load_config, get_playwright_url
 from ...browser.client import connect, new_context, new_page
 from ...browser.navigation import goto_with_auth_check
 from .scraper import (
    perform_export_download,
    perform_export_download_enhanced,
    discover_accounts_from_page,
    discover_accounts_with_numbers,
 )
 from .parser import parse_csv_content
 from ...storage.cache import (
    write_cached_transaction_csv,
    read_cached_transaction_csv,
    TRANSACTION_CACHE_DIR,
 )
 from ...core.models import AccountInfo, TransactionData
 from ...core import Envelope, ErrorType, fail, ok
 import os
 async def _get_transaction_history_enhanced_impl(
    account: Optional[str] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    time_period: Optional[str] = None,
    debug: bool = False,
 ) -> Envelope[TransactionData]:
    """
    Enhanced export with reliable account switching and filename preservation.
    Args:
        account: Account identifier (ending digits like '674', type like 'PLA Assets', or full label like 'PLA_Assets_XXX674').
                 ✅ ENHANCED: Now supports reliable automatic account switching with verification.
        start_date, end_date: Reserved for future "Custom" range support.
        time_period: One of pre-defined periods (e.g., "Current Month", "Last 6 Months"). If None, uses page default.
        debug: Enable debug logging and screenshots.
    Returns:
        Dict with transaction data, account info, and export metadata.
    """
    print("Starting enhanced transaction export...")
    if debug:
        print(f"  Account: {account}")
        print(f"  Time period: {time_period}")
    # Load configuration and cookies
    config = load_config()
    playwright_url = get_playwright_url(config)
    cookies = await ensure_cookies()
    if not cookies:
        return fail(
            "Could not establish session. Check credentials or manually refresh cookies.json.",
            ErrorType.AUTHENTICATION,
            retryable=False,
        )
    # Connect to browser
    p, browser = await connect(playwright_url)
    context = None
    page = None
    try:
        context = await new_context(browser, cookies=cookies)
        page = await new_page(context)
        # Use the enhanced export function
        export_result = await perform_export_download_enhanced(
            page=page,
            time_period=time_period,
            account=account,
            debug=debug,
            context=context,
            preserve_filename=True
        )
        if not export_result.get("success"):
            # Try fallback to cached data
            if account:
                if debug:
                    print("Enhanced export failed, trying cached fallback...")
                # Determine account label for cache lookup
                account_label = account
                if account.isdigit():
                    # Try to discover accounts to find full label
                    try:
                        accounts = await discover_accounts_with_numbers(page, debug=debug)
                        for acc in accounts:
                            if acc['ending'] == account[-3:]:
                                account_label = acc['label']
                                break
                    except Exception:
                        pass
                cached_bytes = read_cached_transaction_csv(account_label)
                if cached_bytes:
                    if debug:
                        print(f"Using cached data for {account_label}")
                    # Parse the cached CSV bytes
                    records = parse_csv_content(cached_bytes)
                    # Build account info from the label
                    account_type = account_label.split('_')[0] if '_' in account_label else "Unknown"
                    account_ending = account_label[-3:] if account_label[-3:].isdigit() else "000"
                    data = TransactionData(
                        account_info=AccountInfo(
                            account_type=account_type,
                            account_ending=account_ending,
                            full_description=account_label,
                            is_selected=True,
                        ),
                        transactions=records,
                        date_range=time_period or "Unknown",
                        export_date="Unknown",
                        total_transactions=len(records),
                        source="cache",
                    )
                    return ok(data)
            return fail(
                export_result.get("error", "Enhanced export failed."),
                ErrorType.UNKNOWN,
                retryable=True,
            )
        # Parse the exported CSV
        saved_path = export_result.get("saved_path")
        if not saved_path or not os.path.exists(saved_path):
            return fail("Export file not found after download", ErrorType.PARSING, retryable=True)
        with open(saved_path, 'r', encoding='utf-8') as f:
            csv_content = f.read()
        parsed_data = parse_csv_content(csv_content.encode('utf-8'))
        if not parsed_data:
            return fail("Failed to parse CSV: No transactions found", ErrorType.PARSING, retryable=True)
        # Build response
        account_info = export_result.get("account_info", {})
        transactions = parsed_data
        # Cache the results
        if account_info.get("account_ending"):
            account_label = f"{account_info.get('account_type', 'Unknown')}_XXX{account_info.get('account_ending')}"
            try:
                # Generate timestamp for filename
                timestamp = datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')
                # Convert transactions back to CSV format for caching
                import csv
                import io
                # Create CSV content from transactions
                output = io.StringIO()
                writer = csv.writer(output)
                # Write header
                writer.writerow(["Date", "Action", "Symbol", "Description", "Quantity", "Price", "Fees & Comm", "Amount"])
                # Write transaction data
                for transaction in transactions:
                    writer.writerow([
                        transaction.date,
                        transaction.action,
                        transaction.symbol or "",
                        transaction.description,
                        transaction.quantity or "",
                        transaction.price or "",
                        transaction.fees_comm or "",
                        transaction.amount or ""
                    ])
                csv_bytes = output.getvalue().encode('utf-8')
                write_cached_transaction_csv(account_label, timestamp, csv_bytes)
                if debug:
                    print(f"Cached transaction data for {account_label}")
            except Exception as e:
                if debug:
                    print(f"Failed to cache data: {e}")
        data = TransactionData(
            account_info=AccountInfo(
                account_type=account_info.get("account_type", "Unknown"),
                account_ending=account_info.get("account_ending", "000"),
                full_description=account_info.get("full_description", ""),
                is_selected=account_info.get("is_selected", True),
            ),
            transactions=transactions,
            date_range=time_period or "Unknown",
            export_date=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'),
            total_transactions=len(transactions),
            source="live",
        )
        if debug:
            print(f"✅ Enhanced export successful: {len(transactions)} transactions")
        return ok(data)
    except Exception as e:
        if debug:
            print(f"Enhanced export exception: {e}")
            import traceback
            traceback.print_exc()
        return fail(f"Enhanced export failed: {str(e)}", ErrorType.UNKNOWN, retryable=True)
    finally:
        if page:
            await page.close()
        if context:
            await context.close()
        if browser:
            await browser.close()
 async def _ensure_cookies() -> Optional[List[Dict[str, Any]]]:
    # Delegate to shared helper
    return await ensure_cookies()
 def _get_latest_cache_csv_filename(account_label: str) -> Optional[str]:
    """Return the most recent CSV filename under the account's cache directory, if any."""
    import os
    dir_path = os.path.join(TRANSACTION_CACHE_DIR, account_label)
    if not os.path.isdir(dir_path):
        return None
    csv_files = [f for f in os.listdir(dir_path) if f.lower().endswith('.csv')]
    if not csv_files:
        return None
    # Sort by mtime if possible; fall back to lexical
    try:
        csv_files.sort(key=lambda f: os.path.getmtime(os.path.join(dir_path, f)))
    except Exception:
        csv_files.sort()
    return csv_files[-1]
 def _is_cache_fresh_for_label(account_label: str, max_age_hours: int = 24) -> bool:
    """Return True if the most recent CSV for `account_label` is within `max_age_hours`."""
    import os, time
    dir_path = os.path.join(TRANSACTION_CACHE_DIR, account_label)
    if not os.path.isdir(dir_path):
        return False
    csv_files = [f for f in os.listdir(dir_path) if f.lower().endswith('.csv')]
    if not csv_files:
        return False
    # Use mtime (file creation/update time) to assess freshness
    newest_path = max((os.path.join(dir_path, f) for f in csv_files), key=lambda p: os.path.getmtime(p))
    age_seconds = time.time() - os.path.getmtime(newest_path)
    return age_seconds <= max_age_hours * 3600
 def _match_account_label_from_cache(account_query: Optional[str]) -> Optional[str]:
    """Resolve a matching account label from cache directories given a query like '604' or 'PLA_Assets_XXX674'.
    Only returns a label if a fresh (<=24h) CSV exists for that label.
    """
    import os
    if not os.path.isdir(TRANSACTION_CACHE_DIR):
        return None
    labels = [name for name in os.listdir(TRANSACTION_CACHE_DIR)
              if os.path.isdir(os.path.join(TRANSACTION_CACHE_DIR, name))]
    if not labels:
        return None
    def label_matches(label: str, query: str) -> bool:
        if not query:
            return True
        if query == label:
            return True
        # match by ending digits
        if query.isdigit() and label.endswith(query):
            return True
        # substring match (e.g., 'PLA_Assets')
        if query.lower() in label.lower():
            return True
        return False
    # If no query provided: return latest fresh label if any
    if not account_query:
        fresh_labels = [lbl for lbl in labels if _is_cache_fresh_for_label(lbl)]
        if not fresh_labels:
            return None
        fresh_labels.sort(key=lambda n: os.path.getmtime(os.path.join(TRANSACTION_CACHE_DIR, n)), reverse=True)
        return fresh_labels[0]
    # Query provided: only return a matching fresh label
    for lbl in labels:
        if label_matches(lbl, account_query) and _is_cache_fresh_for_label(lbl):
            return lbl
    # No fresh matching label
    return None
 async def _get_transaction_history_impl(
    account: Optional[str] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    time_period: Optional[str] = None,
    debug: bool = False,
 ) -> Envelope[TransactionData]:
    """
    Export and parse transaction history for the selected account.
    Args:
        account: Account identifier (ending digits like '604', name like 'Joint', or full label like 'PLA_Assets_XXX674').
                 ⚠️  IMPORTANT: Due to Schwab's website design, automatic account switching causes browser crashes.
                 If the wrong account is selected, you'll get clear instructions to manually select the correct account first.
        start_date, end_date: Reserved for future "Custom" range support.
        time_period: One of pre-defined periods (e.g., "Current Month", "Last 6 Months"). If None, uses page default.
    """
    # Basic input validation for optional custom date params
    def _parse_date(date_str: str) -> Optional[datetime]:
        # Accept YYYY-MM-DD or MM/DD/YYYY
        if re.fullmatch(r"\d{4}-\d{2}-\d{2}", date_str):
            try:
                return datetime.strptime(date_str, "%Y-%m-%d")
            except ValueError:
                return None
        if re.fullmatch(r"\d{2}/\d{2}/\d{4}", date_str):
            try:
                return datetime.strptime(date_str, "%m/%d/%Y")
            except ValueError:
                return None
        return None
    if start_date:
        start_dt = _parse_date(start_date)
        if not start_dt:
            return fail(f"Invalid start_date format: '{start_date}'. Use YYYY-MM-DD or MM/DD/YYYY.", ErrorType.VALIDATION, retryable=False)
    else:
        start_dt = None
    if end_date:
        end_dt = _parse_date(end_date)
        if not end_dt:
            return fail(f"Invalid end_date format: '{end_date}'. Use YYYY-MM-DD or MM/DD/YYYY.", ErrorType.VALIDATION, retryable=False)
    else:
        end_dt = None
    if start_dt and end_dt and start_dt > end_dt:
        return fail(
            "start_date must be on or before end_date",
            ErrorType.VALIDATION,
            retryable=False,
        )
    cookies = await _ensure_cookies()
    if not cookies:
        account_label = _match_account_label_from_cache(account)
        if account_label:
            cached_bytes = read_cached_transaction_csv(account_label)
            if cached_bytes:
                records = parse_csv_content(cached_bytes)
                export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
                account_info = AccountInfo(
                    account_type=account_label.split('_')[0],
                    account_ending=account_label[-3:],
                    full_description=account_label,
                    is_selected=True,
                )
                data = TransactionData(
                    account_info=account_info,
                    transactions=records,
                    date_range=time_period or "Cache",
                    export_date=export_date,
                    total_transactions=len(records),
                    source="cache",
                )
                return ok(data)
        return fail(
            "Unable to establish a session. Provide credentials in config.json or a valid cookies.json.",
            ErrorType.AUTHENTICATION,
            retryable=False,
        )
    config = load_config()
    playwright_url = get_playwright_url(config)
    p, browser = await connect(playwright_url)
    context = None
    page = None
    try:
        context = await new_context(browser, cookies=cookies)
        page = await new_page(context)
        try:
            download = await perform_export_download(
                page,
                time_period=time_period,
                account=account,
                debug=debug,
                context=context,
            )
            csv_bytes = download["content"]
            account_label = download["label"]
            ts = download["ts"]
            # Cache
            write_cached_transaction_csv(account_label, ts, csv_bytes)
            # Parse
            records = parse_csv_content(csv_bytes)
            # Build metadata
            export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
            account_info = AccountInfo(
                account_type=account_label.split('_')[0],
                account_ending=account_label[-3:],
                full_description=account_label,
                is_selected=True,
            )
            data = TransactionData(
                account_info=account_info,
                transactions=records,
                date_range=time_period or "Page Default",
                export_date=export_date,
                total_transactions=len(records),
                source="live",
            )
            return ok(data)
        except Exception as e:
            # First failure: attempt one reconnect and retry, then fallback to cache
            if debug:
                try:
                    print(f"DEBUG: perform_export_download failed: {type(e).__name__}: {e}")
                except Exception:
                    pass
            # Attempt one reconnect if browser/context appears closed
            try:
                # Cleanup previous if possible
                try:
                    if context is not None:
                        await context.close()
                except Exception:
                    pass
                try:
                    await browser.close()
                except Exception:
                    pass
                try:
                    await p.stop()
                except Exception:
                    pass
                # Reconnect
                p, browser = await connect(playwright_url)
                context = await new_context(browser, cookies=cookies)
                page = await new_page(context)
                # Retry export
                if debug:
                    print("DEBUG: Retrying perform_export_download after reconnect...")
                download = await perform_export_download(
                    page,
                    time_period=time_period,
                    account=account,
                    debug=debug,
                    context=context,
                )
                csv_bytes = download["content"]
                account_label = download["label"]
                ts = download["ts"]
                # Cache
                write_cached_transaction_csv(account_label, ts, csv_bytes)
                # Parse
                records = parse_csv_content(csv_bytes)
                export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
                account_info = AccountInfo(
                    account_type=account_label.split('_')[0],
                    account_ending=account_label[-3:],
                    full_description=account_label,
                    is_selected=True,
                )
                data = TransactionData(
                    account_info=account_info,
                    transactions=records,
                    date_range=time_period or "Page Default",
                    export_date=export_date,
                    total_transactions=len(records),
                    source="live",
                )
                return ok(data)
            except Exception as e2:
                if debug:
                    try:
                        print(f"DEBUG: Retry after reconnect failed: {type(e2).__name__}: {e2}")
                    except Exception:
                        pass
                # Fall back to cache if available and fresh
                account_label = _match_account_label_from_cache(account)
                if account_label:
                    cached_bytes = read_cached_transaction_csv(account_label)
                    if cached_bytes:
                        records = parse_csv_content(cached_bytes)
                        export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
                        account_info = AccountInfo(
                            account_type=account_label.split('_')[0],
                            account_ending=account_label[-3:],
                            full_description=account_label,
                            is_selected=True,
                        )
                        data = TransactionData(
                            account_info=account_info,
                            transactions=records,
                            date_range=time_period or "Cache",
                            export_date=export_date,
                            total_transactions=len(records),
                            source="cache",
                        )
                        return ok(data)
                return fail("Export failed and no fresh cache available", ErrorType.UNKNOWN, retryable=True)
    except Exception as e:
        return fail(str(e), ErrorType.UNKNOWN, retryable=True)
    finally:
        try:
            if context is not None:
                await context.close()
        except Exception:
            pass
        try:
            await browser.close()
        except Exception:
            pass
        try:
            await p.stop()
        except Exception:
            pass
 def _get_cache_accounts(debug: bool = False) -> List[Dict[str, Any]]:
    """Get accounts from cache directory fallback with enhanced validation."""
    from ...storage.cache import TRANSACTION_CACHE_DIR
    import os
    from datetime import datetime
    if not os.path.isdir(TRANSACTION_CACHE_DIR):
        if debug:
            print(f"DEBUG: Cache directory does not exist: {TRANSACTION_CACHE_DIR}")
        return []
    out = []
    cache_dirs = []
    # Collect all cache directories with metadata
    for name in os.listdir(TRANSACTION_CACHE_DIR):
        path = os.path.join(TRANSACTION_CACHE_DIR, name)
        if os.path.isdir(path):
            try:
                # Get directory modification time and file count
                stat = os.stat(path)
                csv_files = [f for f in os.listdir(path) if f.endswith('.csv')]
                cache_dirs.append({
                    'name': name,
                    'path': path,
                    'mtime': stat.st_mtime,
                    'csv_count': len(csv_files),
                    'csv_files': csv_files
                })
            except Exception as e:
                if debug:
                    print(f"DEBUG: Error processing cache dir {name}: {e}")
                continue
    # Sort by modification time (most recent first) to prioritize active accounts
    cache_dirs.sort(key=lambda x: x['mtime'], reverse=True)
    if debug:
        print(f"DEBUG: Found {len(cache_dirs)} cache directories")
    for cache_info in cache_dirs:
        name = cache_info['name']
        csv_files = cache_info['csv_files']
        if not csv_files:
            if debug:
                print(f"DEBUG: Skipping {name} - no CSV files")
            continue
        try:
            # Normalize using filename parser to ensure consistent label
            normalized_label = name
            account_type = None
            account_ending = None
            # Strategy 1: Use directory name if it matches expected pattern
            if re.match(r"^[A-Za-z_]+_XXX\d{3,4}$", name):
                normalized_label = name
                parts = name.split('_XXX')
                account_type = parts[0].replace('_', ' ')
                account_ending = parts[1] if len(parts) > 1 else name[-3:]
            else:
                # Strategy 2: Parse from most recent CSV filename
                try:
                    from .scraper import parse_suggested_filename
                    latest_csv = sorted(csv_files)[-1]  # Get most recent file
                    parsed_filename = parse_suggested_filename(latest_csv)
                    normalized_label = parsed_filename["label"]
                    # Extract type and ending from parsed label
                    if '_XXX' in normalized_label:
                        parts = normalized_label.split('_XXX')
                        account_type = parts[0].replace('_', ' ')
                        account_ending = parts[1] if len(parts) > 1 else normalized_label[-3:]
                except Exception as e:
                    if debug:
                        print(f"DEBUG: Failed to parse filename for {name}: {e}")
                    # Strategy 3: Fallback to directory name parsing
                    normalized_label = name
                    account_type = name
                    account_ending = name[-3:] if name[-3:].isdigit() else "000"
            # Validate the parsed data
            if not account_ending or not account_ending.isdigit() or len(account_ending) < 3:
                if debug:
                    print(f"DEBUG: Invalid account ending for {name}: {account_ending}")
                continue
            # Create account entry
            account_entry = {
                "label": normalized_label,
                "type": account_type or normalized_label.split('_')[0],
                "ending": account_ending[-3:],  # Ensure 3 digits
                "cache_info": {
                    "last_updated": datetime.fromtimestamp(cache_info['mtime']).isoformat(),
                    "csv_count": cache_info['csv_count']
                }
            }
            out.append(account_entry)
            if debug:
                print(f"DEBUG: Added cache account: {normalized_label} ({account_type} ending {account_ending[-3:]}) - {cache_info['csv_count']} files")
        except Exception as e:
            if debug:
                print(f"DEBUG: Error processing cache account {name}: {e}")
            continue
    if debug:
        print(f"DEBUG: Successfully processed {len(out)} accounts from cache")
        if not out:
            print(f"DEBUG: Cache directory contents: {os.listdir(TRANSACTION_CACHE_DIR) if os.path.isdir(TRANSACTION_CACHE_DIR) else 'N/A'}")
    return out
 async def _list_available_accounts_impl(debug: bool = False) -> List[Dict[str, Any]]:
    """Return list of available accounts from live page when possible; fall back to cache with enhanced reliability."""
    if debug:
        print("DEBUG: Starting account listing with enhanced discovery...")
    # Try live discovery with enhanced error handling
    cookies = await _ensure_cookies()
    if cookies:
        if debug:
            print("DEBUG: Session cookies available, attempting live account discovery...")
        config = load_config()
        playwright_url = get_playwright_url(config)
        p, browser = await connect(playwright_url)
        context = None
        page = None
        try:
            context = await new_context(browser, cookies=cookies)
            page = await new_page(context)
            # Use centralized auth-aware navigation with retry
            max_auth_attempts = 2
            auth_success = False
            for auth_attempt in range(max_auth_attempts):
                if debug:
                    print(f"DEBUG: Authentication attempt {auth_attempt + 1}/{max_auth_attempts}...")
                auth_success = await goto_with_auth_check(page, context, "https://client.schwab.com/app/accounts/history/#/", debug=debug)
                if auth_success:
                    break
                elif auth_attempt < max_auth_attempts - 1:
                    if debug:
                        print("DEBUG: Authentication failed, retrying...")
                    await page.wait_for_timeout(3000)
            if not auth_success:
                if debug:
                    print("DEBUG: All authentication attempts failed")
                raise Exception("Authentication failed after multiple attempts")
            if debug:
                print("DEBUG: Successfully authenticated, discovering accounts from live dropdown...")
            # Enhanced account discovery with fallback strategies
            accounts = []
            try:
                accounts = await discover_accounts_from_page(page, debug=debug)
                if debug:
                    print(f"DEBUG: Live account discovery returned {len(accounts)} accounts")
            except Exception as e:
                if debug:
                    print(f"DEBUG: Live account discovery failed: {e}")
                accounts = []
            # Enhanced result processing
            if accounts:
                if debug:
                    print(f"DEBUG: Successfully discovered {len(accounts)} accounts from live page:")
                    for acc in accounts:
                        print(f"DEBUG: - {acc['label']} ({acc['type']} ending {acc['ending']})")
                # Always try to enrich with cache data for completeness
                cache_accounts = _get_cache_accounts(debug=debug)
                if cache_accounts:
                    if debug:
                        print(f"DEBUG: Found {len(cache_accounts)} accounts in cache, merging...")
                    # Merge live and cache, preferring live data but keeping unique cache entries
                    combined = {acc['ending']: acc for acc in cache_accounts}
                    live_endings = set()
                    for live_acc in accounts:
                        combined[live_acc['ending']] = live_acc  # Live data takes precedence
                        live_endings.add(live_acc['ending'])
                    result = list(combined.values())
                    if debug:
                        print(f"DEBUG: Final merged result: {len(result)} accounts")
                        for acc in result:
                            source = "live" if acc['ending'] in live_endings else "cache"
                            print(f"DEBUG: - {acc['label']} ({acc['type']} ending {acc['ending']}) [{source}]")
                    return result
                else:
                    if debug:
                        print("DEBUG: No cache data available, returning live accounts only")
                    return accounts
            else:
                if debug:
                    print("DEBUG: No accounts discovered from live page, falling back to cache only")
        except Exception as e:
            if debug:
                print(f"DEBUG: Live account discovery failed with error: {e}")
            # Continue to cache fallback
        finally:
            # Enhanced cleanup
            cleanup_tasks = []
            if context is not None:
                cleanup_tasks.append(context.close())
            if browser is not None:
                cleanup_tasks.append(browser.close())
            if p is not None:
                cleanup_tasks.append(p.stop())
            for task in cleanup_tasks:
                try:
                    await task
                except Exception:
                    pass
    else:
        if debug:
            print("DEBUG: No session cookies available, skipping live discovery")
    # Enhanced cache fallback
    if debug:
        print("DEBUG: Using cache-only fallback for account listing...")
    cache_accounts = _get_cache_accounts(debug=debug)
    if cache_accounts:
        if debug:
            print(f"DEBUG: Successfully retrieved {len(cache_accounts)} accounts from cache")
        return cache_accounts
    else:
        if debug:
            print("DEBUG: No accounts found in cache either")
        return []
 async def list_available_accounts(debug: bool = False) -> Envelope[List[Dict[str, Any]]]:
    try:
        accounts = await _list_available_accounts_impl(debug=debug)
        return ok(accounts)
    except Exception as exc:
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
 async def get_transaction_history(
    account: Optional[str] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    time_period: Optional[str] = None,
    debug: bool = False,
 ) -> Envelope[TransactionData]:
    return await _get_transaction_history_impl(
        account=account,
        start_date=start_date,
        end_date=end_date,
        time_period=time_period,
        debug=debug,
    )
 async def get_transaction_history_enhanced(
    account: Optional[str] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    time_period: Optional[str] = None,
    debug: bool = False,
 ) -> Envelope[TransactionData]:
    return await _get_transaction_history_enhanced_impl(
        account=account,
        start_date=start_date,
        end_date=end_date,
        time_period=time_period,
        debug=debug,
    )
--- a/schwab_scraper/server/init.py
+++ b/schwab_scraper/server/init.py
--- a/schwab_scraper/server/api.py
+++ b/schwab_scraper/server/api.py
@@ -0,0 +1,74 @@
 from fastapi import FastAPI, HTTPException
 import asyncio
 from schwab_scraper import unified_api
 from schwab_scraper.core import Envelope
 app = FastAPI(title="Schwab Scraper API", version="0.1.0", description="REST API for Schwab Scraper via unified_api")
 browser_lock = asyncio.Semaphore(1)
 async def check_success(envelope: Envelope):
    if not envelope.get("success"):
        raise HTTPException(status_code=400, detail=envelope.get("error", "Unknown error"))
    return envelope.get("data")
@app.get("/api/accounts", tags=["Accounts"])
 async def list_accounts():
    """List all available Schwab accounts."""
    async with browser_lock:
        env = await unified_api.list_accounts()
    return await check_success(env)
@app.get("/api/accounts/overview", tags=["Accounts"])
 async def get_overview(account: str | None = None):
    """Get a high level overview of an account or all accounts."""
    async with browser_lock:
        env = await unified_api.get_account_overview(account)
    return await check_success(env)
@app.get("/api/accounts/positions", tags=["Accounts"])
 async def get_positions(account: str | None = None, include_non_equity: bool = False):
    """Retrieve positions/holdings for an account."""
    async with browser_lock:
        env = await unified_api.get_positions(account, include_non_equity=include_non_equity)
    return await check_success(env)
@app.get("/api/transactions", tags=["Transactions"])
 async def get_transactions(
    account: str | None = None,
    limit: int = 50,
    days_back: int = 90
 ):
    """Fetch transaction history."""
    async with browser_lock:
        env = await unified_api.get_transaction_history_enhanced(
            account=account, limit=limit, days_back=days_back
        )
    return await check_success(env)
@app.get("/api/equity/morningstar/{ticker}", tags=["Research"])
 async def get_morningstar(ticker: str):
    """Get Morningstar rating details for an equity."""
    async with browser_lock:
        env = await unified_api.get_morningstar_data(ticker)
    return await check_success(env)
@app.get("/api/equity/phase1/{ticker}", tags=["Research"])
 async def get_equity_phase1(ticker: str):
    """Fetch base Phase1 equity statistics (pricing, basic facts)."""
    async with browser_lock:
        env = await unified_api.get_equity_phase1_data(ticker)
    return await check_success(env)
@app.get("/api/session/status", tags=["System"])
 async def get_session_status():
    """Check if the cookies and session are currently valid."""
    async with browser_lock:
        env = await unified_api.get_session_status()
    return await check_success(env)
 def start():
    import uvicorn
    uvicorn.run("schwab_scraper.server.api:app", host="0.0.0.0", port=8000, reload=True)
 if __name__ == "__main__":
    start()
--- a/schwab_scraper/server/mcp_server.py
+++ b/schwab_scraper/server/mcp_server.py
@@ -0,0 +1,79 @@
 from mcp.server.fastmcp import FastMCP
 from starlette.applications import Starlette
 from starlette.routing import Route, Mount
 from starlette.responses import JSONResponse
 import uvicorn
 import asyncio
 import os
 from schwab_scraper import unified_api
 # Note: Using the official mcp.server.fastmcp module (installed via pip mcp)
 mcp = FastMCP("SchwabScraper", description="Schwab Scraper MCP Server for financial data")
 browser_lock = asyncio.Semaphore(1)
 def unwrap(env):
    if not env.get("success"):
        raise Exception(f"Failed: {env.get('error')}")
    return env.get("data")
@mcp.tool()
 async def get_session_status() -> dict:
    """Get the current session status for the Schwab scraper."""
    async with browser_lock:
        return unwrap(await unified_api.get_session_status())
@mcp.tool()
 async def list_accounts() -> list:
    """List all available Schwab accounts and mask IDs."""
    async with browser_lock:
        accounts = unwrap(await unified_api.list_accounts())
        return [acc.model_dump() for acc in accounts] if accounts else []
@mcp.tool()
 async def get_account_overview(account_id: str = None) -> dict:
    """Get high level overview balances, equity, and metrics for a specific account or all accounts."""
    async with browser_lock:
        overview = unwrap(await unified_api.get_account_overview(account_id))
        return overview.model_dump() if overview else {}
@mcp.tool()
 async def get_positions(account_id: str = None, include_non_equity: bool = False) -> list:
    """Get specific stock, bond, or fund positions held in an account."""
    async with browser_lock:
        pos = unwrap(await unified_api.get_positions(account_id, include_non_equity=include_non_equity))
        return [p.model_dump() for p in pos] if pos else []
@mcp.tool()
 async def get_transactions(account_id: str = None, limit: int = 50, days_back: int = 90) -> list:
    """Get transaction history (trades, dividends, transfers) for a specific account."""
    async with browser_lock:
        tx = unwrap(await unified_api.get_transaction_history_enhanced(account_id, limit=limit, days_back=days_back))
        return [t.model_dump() for t in tx] if tx else []
@mcp.tool()
 async def get_morningstar_data(ticker: str) -> dict:
    """Get Morningstar research data for a specific ticker symbol (E.g. AAPL) directly from Schwab."""
    async with browser_lock:
        data = unwrap(await unified_api.get_morningstar_data(ticker))
        return data.model_dump() if data else {}
 # --- Blueprint Requirements: Health Check & ASGI App ---
 async def health(request):
    return JSONResponse({"status": "ok"})
 def create_app():
    # If using mcp.server.fastmcp from 'mcp' package >= 1.2, it doesn't expose a clean Starlette 
    # mount utility like the old 'fastmcp' did. However, mcp.server.fastmcp exposes create_starlette_app() 
    # if using SSE transport module. We'll simply let FastMCP handle SSE natively and run Starlette only if needed, 
    # but the blueprint strictly wants Starlette wrapping. 
    # For newer SDKs, starlette_app is an internal property when running sse.
    pass
 if __name__ == "__main__":
    port = int(os.environ.get("PORT", 8000))
    # We use mcp.run directly rather than rolling a custom starlette wrapper,
    # as the official SDK changed the mounting pattern since the blueprint was written. 
    # This automatically serves the SSE endpoints over HTTP and is standard.
    # Note: FastMCP natively spins up uvicorn for us.
    mcp.run(transport="sse", host="0.0.0.0", port=port)
--- a/schwab_scraper/storage/init.py
+++ b/schwab_scraper/storage/init.py
--- a/schwab_scraper/storage/cache.py
+++ b/schwab_scraper/storage/cache.py
@@ -0,0 +1,74 @@
 import os
 from typing import Optional
 CACHE_DIR = "data/morningstar_pdfs"
 TRANSACTION_CACHE_DIR = "data/transaction_csvs"
 def ensure_cache_dir() -> str:
    os.makedirs(CACHE_DIR, exist_ok=True)
    return CACHE_DIR
 def ensure_transaction_cache_dir() -> str:
    os.makedirs(TRANSACTION_CACHE_DIR, exist_ok=True)
    return TRANSACTION_CACHE_DIR
 def cache_filename(ticker: str, formatted_date: str) -> str:
    ensure_cache_dir()
    # Sanitize date string to remove slashes that would create subdirectories
    safe_date = formatted_date.replace('/', '_').replace('\\', '_')
    return os.path.join(CACHE_DIR, f"{ticker.upper()}_{safe_date}.pdf")
 def transaction_cache_filename(account_label: str, timestamp_str: str) -> str:
    """Return a path like data/transaction_csvs/<account_label>/<account_label>_Transactions_<timestamp>.csv
    account_label examples: "Joint_XXX604", "IRA_XXX873". Timestamp is usually YYYYMMDD-HHMMSS.
    """
    ensure_transaction_cache_dir()
    safe_label = account_label.replace("/", "_")
    account_dir = os.path.join(TRANSACTION_CACHE_DIR, safe_label)
    os.makedirs(account_dir, exist_ok=True)
    return os.path.join(account_dir, f"{safe_label}_Transactions_{timestamp_str}.csv")
 def read_cached_pdf(ticker: str) -> Optional[bytes]:
    ensure_cache_dir()
    files = [f for f in os.listdir(CACHE_DIR) if f.startswith(ticker.upper()) and f.endswith(".pdf")]
    if not files:
        return None
    with open(os.path.join(CACHE_DIR, files[0]), "rb") as f:
        return f.read()
 def read_cached_transaction_csv(account_label: str) -> Optional[bytes]:
    """Return latest cached CSV bytes for an account label, if any."""
    ensure_transaction_cache_dir()
    safe_label = account_label.replace("/", "_")
    account_dir = os.path.join(TRANSACTION_CACHE_DIR, safe_label)
    if not os.path.isdir(account_dir):
        return None
    files = [f for f in os.listdir(account_dir) if f.endswith('.csv')]
    if not files:
        return None
    # Pick most recent by name (timestamp in filename)
    files.sort(reverse=True)
    with open(os.path.join(account_dir, files[0]), 'rb') as f:
        return f.read()
 def write_cached_pdf(ticker: str, formatted_date: str, pdf_bytes: bytes) -> str:
    ensure_cache_dir()
    path = cache_filename(ticker, formatted_date)
    with open(path, "wb") as f:
        f.write(pdf_bytes)
    return path
 def write_cached_transaction_csv(account_label: str, timestamp_str: str, csv_bytes: bytes) -> str:
    path = transaction_cache_filename(account_label, timestamp_str)
    with open(path, 'wb') as f:
        f.write(csv_bytes)
    return path
--- a/schwab_scraper/unified_api.py
+++ b/schwab_scraper/unified_api.py
@@ -0,0 +1,188 @@
 """Unified Schwab data surface with envelope-based async endpoints."""
 from __future__ import annotations
 from typing import Optional
 from .core import AccountOverview, AccountSummary, Envelope, MorningstarData, PortfolioSnapshot, Position, EquityPhase1Data
 from .core.models import TransactionData
 from .core import ErrorType, fail
 from .features.accounts_positions.accounts_scraper import list_accounts as _list_accounts
 from .features.accounts_positions.overview_scraper import get_account_overview as _get_account_overview
 from .features.accounts_positions.positions_scraper import get_positions as _get_positions
 from .features.accounts_positions.portfolio_scraper import get_portfolio_snapshot as _get_portfolio_snapshot
 from .features.equity.service import get_morningstar_data as _get_morningstar_data, get_equity_phase1_data as _get_equity_phase1_data
 from .features.transactions.service import (
    get_transaction_history as _get_transaction_history,
    get_transaction_history_enhanced as _get_transaction_history_enhanced,
    list_available_accounts as _list_available_accounts,
 )
 from .browser.session import get_session_status as _get_session_status_impl
 from .browser.session import refresh_session as _refresh_session_impl
 from .browser.session import set_cookies_from_file as _set_cookies_impl
 from .browser.session import export_cookies as _export_cookies_impl
 async def get_session_status(debug: bool = False) -> Envelope[dict]:
    try:
        status = await _get_session_status_impl(debug=debug)
        return status  # already returns envelope
    except Exception as exc:
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
 async def refresh_session(debug: bool = False) -> Envelope[None]:
    try:
        return await _refresh_session_impl(debug=debug)
    except Exception as exc:
        return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
 async def set_cookies(cookies_path: str, debug: bool = False) -> Envelope[None]:
    try:
        return await _set_cookies_impl(cookies_path, debug=debug)
    except Exception as exc:
        return fail(str(exc), ErrorType.UNKNOWN, retryable=False)
 async def export_cookies(cookies_path: str, debug: bool = False) -> Envelope[None]:
    try:
        return await _export_cookies_impl(cookies_path, debug=debug)
    except Exception as exc:
        return fail(str(exc), ErrorType.UNKNOWN, retryable=False)
 async def list_accounts(debug: bool = False) -> Envelope[list[AccountSummary]]:
    envelope = await _list_accounts(debug=debug)
    if not envelope["success"]:
        return envelope
    data = envelope["data"] or []
    summaries: list[AccountSummary] = []
    for item in data:
        if isinstance(item, AccountSummary):
            summaries.append(item)
        else:
            summaries.append(AccountSummary(**item))
    return {
        "success": True,
        "data": summaries,
        "error": None,
        "error_type": None,
        "retryable": False,
    }
 async def get_account_overview(
    account: AccountSummary | str | None = None,
    *,
    debug: bool = False,
 ) -> Envelope[AccountOverview]:
    if isinstance(account, dict):
        account = AccountSummary(**account)
    return await _get_account_overview(account=account, debug=debug)
 async def get_positions(
    account: AccountSummary | str | None = None,
    *,
    include_non_equity: bool = False,
    debug: bool = False,
 ) -> Envelope[list[Position]]:
    if isinstance(account, dict):
        account = AccountSummary(**account)
    return await _get_positions(account=account, include_non_equity=include_non_equity, debug=debug)
 async def get_portfolio_snapshot(
    account: AccountSummary | str | None = None,
    *,
    aggregate_by_symbol: bool = True,
    include_non_equity: bool = False,
    debug: bool = False,
 ) -> Envelope[PortfolioSnapshot]:
    if isinstance(account, dict):
        account = AccountSummary(**account)
    return await _get_portfolio_snapshot(
        account=account,
        aggregate_by_symbol=aggregate_by_symbol,
        include_non_equity=include_non_equity,
        debug=debug,
    )
 async def get_morningstar_data(ticker: str, debug: bool = False) -> Envelope[MorningstarData]:
    return await _get_morningstar_data(ticker, debug=debug)
 async def get_equity_phase1_data(ticker: str, debug: bool = False) -> Envelope[EquityPhase1Data]:
    """Get Phase 1 enhanced equity data for a ticker.
    Extracts:
    - Quote/Price Data (symbol bar)
    - Enhanced Dividend Information (forward-looking dates)
    - Core Earnings Metrics (EPS, forecasts)
    - Basic Valuation Ratios (P/E, Forward P/E, PEG)
    - Calculated Metrics (payout ratio)
    Args:
        ticker: Stock ticker symbol
        debug: Enable debug logging
    Returns:
        Envelope containing EquityPhase1Data or error
    """
    return await _get_equity_phase1_data(ticker, debug=debug)
 async def list_available_accounts(debug: bool = False) -> Envelope[list[dict]]:
    return await _list_available_accounts(debug=debug)
 async def get_transaction_history(
    account: Optional[str] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    time_period: Optional[str] = None,
    debug: bool = False,
 ) -> Envelope[TransactionData]:
    envelope = await _get_transaction_history(
        account=account,
        start_date=start_date,
        end_date=end_date,
        time_period=time_period,
        debug=debug,
    )
    return envelope
 async def get_transaction_history_enhanced(
    account: Optional[str] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
    time_period: Optional[str] = None,
    debug: bool = False,
 ) -> Envelope[TransactionData]:
    envelope = await _get_transaction_history_enhanced(
        account=account,
        start_date=start_date,
        end_date=end_date,
        time_period=time_period,
        debug=debug,
    )
    return envelope
 __all__ = [
    "get_session_status",
    "refresh_session",
    "set_cookies",
    "export_cookies",
    "list_accounts",
    "get_account_overview",
    "get_positions",
    "get_portfolio_snapshot",
    "get_morningstar_data",
    "get_equity_phase1_data",
    "list_available_accounts",
    "get_transaction_history",
    "get_transaction_history_enhanced",
 ]
--- a/schwab_scraper/utils/init.py
+++ b/schwab_scraper/utils/init.py
--- a/schwab_scraper/utils/logging.py
+++ b/schwab_scraper/utils/logging.py
@@ -0,0 +1,19 @@
 import logging
 import os
 from datetime import datetime, timezone
 def setup_logging(debug: bool = False) -> None:
    level = logging.DEBUG if debug else logging.INFO
    logging.basicConfig(level=level, format='%(asctime)s %(levelname)s %(name)s: %(message)s')
 def save_debug_artifact(filename: str, content: str | bytes) -> str:
    debug_dir = "debug"
    os.makedirs(debug_dir, exist_ok=True)
    timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    path = os.path.join(debug_dir, f"{timestamp}_{filename}")
    mode = 'wb' if isinstance(content, (bytes, bytearray)) else 'w'
    with open(path, mode) as f:
        f.write(content)  # type: ignore[arg-type]
    return path
--- a/uv.lock
+++ b/uv.lock
@@ -121,6 +121,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 [[package]]
 name = "annotated-doc"
 version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
 ]
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -484,6 +493,22 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
 ]
 [[package]]
 name = "fastapi"
 version = "0.136.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "annotated-doc" },
    { name = "pydantic" },
    { name = "starlette" },
    { name = "typing-extensions" },
    { name = "typing-inspection" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5d/45/c130091c2dfa061bbfe3150f2a5091ef1adf149f2a8d2ae769ecaf6e99a2/fastapi-0.136.1.tar.gz", hash = "sha256:7af665ad7acfa0a3baf8983d393b6b471b9da10ede59c60045f49fbc89a0fa7f", size = 397448, upload-time = "2026-04-23T16:49:44.046Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/5a/ff/2e4eca3ade2c22fe1dea7043b8ee9dabe47753349eb1b56a202de8af6349/fastapi-0.136.1-py3-none-any.whl", hash = "sha256:a6e9d7eeada96c93a4d69cb03836b44fa34e2854accb7244a1ece36cd4781c3f", size = 117683, upload-time = "2026-04-23T16:49:42.437Z" },
 ]
 [[package]]
 name = "fastmcp"
 version = "3.2.4"
@@ -1686,35 +1711,34 @@ name = "schwab-mcp-custom"
 version = "0.1.0"
 source = { editable = "." }
 dependencies = [
    { name = "aiohttp" },
    { name = "fastapi" },
    { name = "fastmcp" },
    { name = "greenlet" },
    { name = "mcp" },
-    { name = "schwab-scraper" },
+    { name = "pdfplumber" },
    { name = "playwright" },
    { name = "pyee" },
    { name = "starlette" },
    { name = "typing-extensions" },
    { name = "uvicorn" },
 ]
 [package.metadata]
 requires-dist = [
    { name = "aiohttp", specifier = ">=3.9.0" },
    { name = "fastapi", specifier = ">=0.136.1" },
    { name = "fastmcp", specifier = ">=0.4.1" },
    { name = "greenlet", specifier = ">=3.2.3" },
    { name = "mcp", specifier = ">=1.2.0" },
-    { name = "schwab-scraper", git = "https://gitea.ext.ben.io/b3nw/schwab-scraper.git" },
+    { name = "pdfplumber", specifier = ">=0.11.4" },
    { name = "playwright", specifier = "==1.54.0" },
    { name = "pyee", specifier = ">=13.0.0" },
    { name = "starlette", specifier = ">=0.41.0" },
    { name = "typing-extensions", specifier = ">=4.14.0" },
    { name = "uvicorn", specifier = ">=0.32.0" },
 ]
 [[package]]
 name = "schwab-scraper"
 version = "0.6.16"
 source = { git = "https://gitea.ext.ben.io/b3nw/schwab-scraper.git#f1680aec7e26d4ec0ba71890b2f585bec0aeb13d" }
 dependencies = [
    { name = "aiohttp" },
    { name = "greenlet" },
    { name = "pdfplumber" },
    { name = "playwright" },
    { name = "pyee" },
    { name = "typing-extensions" },
 ]
 [[package]]
 name = "secretstorage"
 version = "3.5.0"