Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s

This commit is contained in:
2026-04-24 01:50:20 +00:00
parent 02ac293692
commit 650ea2d087
43 changed files with 10900 additions and 41 deletions

View File

@@ -0,0 +1,470 @@
"""
Session management module for maintaining Schwab authenticated sessions.
This module provides functionality to refresh session state through browser navigation
without requiring 2FA approval for active sessions.
"""
import json
import logging
import time
from typing import List, Dict, Any, Optional
from datetime import datetime
from playwright.async_api import async_playwright
from ..core.config import load_config, get_playwright_url, get_cookies_path
from .client import new_context, new_page
from ..core import ErrorType, Envelope, fail, ok
async def refresh_session_state(cookies: Optional[List[Dict[str, Any]]] = None) -> bool:
"""
Refresh session state through browser navigation.
This function maintains active sessions by navigating to a Schwab page,
which updates cookie expiration times and session state without requiring
2FA approval for active sessions.
Args:
cookies: Optional list of cookies to use. If None, loads from cookies.json
Returns:
bool: True if session refresh was successful, False otherwise
"""
logger = logging.getLogger(__name__)
try:
logger.info("Starting session refresh through navigation")
# Load cookies if not provided
if cookies is None:
cookies_path = get_cookies_path()
try:
with open(cookies_path, 'r') as f:
cookies = json.load(f)
logger.info(f"Loaded {len(cookies) if cookies else 0} cookies from {cookies_path}")
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.error(f"Could not load cookies: {e}")
return False
if not cookies:
logger.error("No cookies available for session refresh")
return False
config = load_config()
playwright_url = get_playwright_url(config)
async with async_playwright() as p:
try:
browser = await p.chromium.connect(playwright_url)
except Exception as e:
logger.error(f"Failed to connect to browser: {e}")
return False
try:
# Create context with existing cookies
context = await new_context(browser, cookies=cookies)
page = await new_page(context)
# Navigate to refresh session state
logger.info("Navigating to Schwab research page to refresh session")
await page.goto("https://client.schwab.com/app/research/#/stocks/AAPL", timeout=30000)
await page.wait_for_timeout(2000) # Let page settle and cookies update
# Check if navigation was successful (no redirect to login)
current_url = page.url
is_redirected = any(pattern in current_url for pattern in [
'/login', '/signin', '/auth', '/Access/'
])
if is_redirected:
logger.warning(f"Session refresh failed: redirected to login page")
logger.debug(f"Current URL: {current_url}")
await context.close()
await browser.close()
return False
# Get updated cookies after navigation
new_cookies = await context.cookies()
logger.info(f"Retrieved {len(new_cookies)} cookies after navigation")
# Check if we still have critical session cookies
critical_session_cookies = ['LVAL', 'NS2', 'sstate']
missing_critical_cookies = []
for cookie_name in critical_session_cookies:
old_cookie = next((c for c in cookies if c['name'] == cookie_name), None)
new_cookie = next((c for c in new_cookies if c['name'] == cookie_name), None)
if not new_cookie:
missing_critical_cookies.append(cookie_name)
elif old_cookie and new_cookie.get('expires') != -1:
# Session cookies should have expires = -1
missing_critical_cookies.append(f"{cookie_name} (invalid session cookie)")
if missing_critical_cookies:
logger.warning(f"Session refresh failed: missing critical session cookies: {missing_critical_cookies}")
await context.close()
await browser.close()
return False
# Compare cookie states to detect changes
changes = []
old_dict = {c['name']: c for c in cookies}
new_dict = {c['name']: c for c in new_cookies}
# Check for modified cookies (especially expiration changes)
for name in old_dict:
if name in new_dict:
old_cookie = old_dict[name]
new_cookie = new_dict[name]
# Check if expiration changed
old_expires = old_cookie.get('expires', -1)
new_expires = new_cookie.get('expires', -1)
if old_expires != new_expires:
changes.append({
'type': 'expiration_changed',
'name': name,
'old_expires': old_expires,
'new_expires': new_expires
})
if changes:
logger.info(f"Detected {len(changes)} cookie changes (session refreshed)")
for change in changes[:3]: # Show first 3
logger.debug(f" {change['name']}: expiration updated")
else:
logger.info("No cookie changes detected (session maintained)")
# Save updated cookies
cookies_path = get_cookies_path()
with open(cookies_path, 'w') as f:
json.dump(new_cookies, f, indent=2)
logger.info(f"Saved {len(new_cookies)} updated cookies")
await context.close()
await browser.close()
return True
except Exception as e:
logger.error(f"Error during session refresh: {e}")
try:
await context.close()
except:
pass
await browser.close()
return False
except Exception as e:
logger.error(f"Session refresh failed: {e}")
return False
async def maintain_session_health() -> bool:
"""
Check if the current session is healthy by attempting a simple navigation.
Returns:
bool: True if session is healthy, False if refresh is needed
"""
logger = logging.getLogger(__name__)
try:
logger.info("Checking session health")
# Load current cookies
cookies_path = get_cookies_path()
try:
with open(cookies_path, 'r') as f:
cookies = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
logger.error("No valid cookies found")
return False
if not cookies:
logger.error("No cookies available")
return False
# First, check if we have valid session cookies (basic check)
current_time = int(time.time())
has_valid_session_cookies = False
for cookie in cookies:
name = cookie.get('name', '')
expires = cookie.get('expires', -1)
# Check for actual Schwab session cookies
if name in ['auth', 'ASP.NET_SessionId', 'SessionInfo', '__RequestVerificationToken']:
# Session cookies (expires=-1) are valid until browser closes
# Other cookies must not be expired
if expires == -1 or (expires and expires > current_time):
has_valid_session_cookies = True
break
if not has_valid_session_cookies:
logger.warning("Session health check: FAILED - no valid session cookies found")
return False
config = load_config()
playwright_url = get_playwright_url(config)
async with async_playwright() as p:
browser = await p.chromium.connect(playwright_url)
try:
context = await new_context(browser, cookies=cookies)
page = await new_page(context)
# Navigate to a simple page to test session
await page.goto("https://client.schwab.com/app/research/#/stocks/AAPL", timeout=30000)
# Check if we're still authenticated by URL pattern
current_url = page.url
logger.debug(f"Current URL after navigation: {current_url}")
is_authenticated_by_url = any(pattern in current_url for pattern in [
'/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/summary'
])
# Check for login redirect patterns
is_redirected = any(pattern in current_url for pattern in [
'/login', '/signin', '/auth', '/Access/'
])
logger.debug(f"Authenticated by URL pattern: {is_authenticated_by_url}")
logger.debug(f"Redirected to login: {is_redirected}")
# Primary check: If we're not redirected and have a good URL pattern, we're authenticated
if is_authenticated_by_url and not is_redirected:
logger.info("Session health check: PASSED - authenticated URL detected")
result = True
elif is_redirected:
logger.warning("Session health check: FAILED - redirect to login detected")
result = False
else:
# Secondary check: Look for any page content that indicates we're not on a login page
try:
# Check for login form elements
login_indicators = [
'input[type="password"]',
'input[name*="login"]',
'input[name*="user"]',
'input[id*="login"]',
'input[id*="user"]',
'button:has-text("Log In")',
'button:has-text("Sign In")'
]
login_found = False
for selector in login_indicators:
login_element = await page.query_selector(selector)
if login_element:
login_found = True
break
if login_found:
logger.warning("Session health check: FAILED - login form detected")
result = False
else:
logger.info("Session health check: PASSED - no login form detected")
result = True
except Exception as e:
logger.debug(f"Login form check error: {e}")
# If we can't check, assume healthy if we have valid cookies and no redirect
logger.info("Session health check: PASSED - based on cookies and URL")
result = True
await context.close()
await browser.close()
return result
except Exception as e:
logger.error(f"Session health check error: {e}")
try:
await context.close()
except:
pass
await browser.close()
return False
except Exception as e:
logger.error(f"Session health check failed: {e}")
return False
def get_session_info() -> Dict[str, Any]:
"""
Get information about the current session state.
Returns:
Dict containing session information
"""
cookies_path = get_cookies_path()
try:
with open(cookies_path, 'r') as f:
cookies = json.load(f)
session_cookies = []
expiring_cookies = []
current_time = datetime.now().timestamp()
for cookie in cookies:
name = cookie.get('name', '')
expires = cookie.get('expires', -1)
# Check if this is a session-related cookie
if any(keyword in name.lower() for keyword in ['session', 'auth', 'token']):
session_cookies.append({
'name': name,
'domain': cookie.get('domain', ''),
'expires': expires,
'is_session_cookie': expires == -1
})
if expires != -1 and expires > 0:
days_until_expire = (expires - current_time) / (24 * 3600)
if days_until_expire < 7: # Expiring within a week
expiring_cookies.append({
'name': name,
'days_until_expire': days_until_expire
})
return {
'total_cookies': len(cookies),
'session_cookies': len(session_cookies),
'expiring_cookies': len(expiring_cookies),
'expiring_soon': expiring_cookies,
'session_status': 'active' if session_cookies else 'no_session_cookies'
}
except (FileNotFoundError, json.JSONDecodeError):
return {
'error': 'No valid cookies found',
'total_cookies': 0,
'session_cookies': 0,
'expiring_cookies': 0,
'expiring_soon': [],
'session_status': 'missing_cookies'
}
async def ensure_valid_session() -> bool:
"""
Ensure we have a valid session, attempting refresh if needed.
Returns:
bool: True if a valid session exists or was successfully refreshed
"""
logger = logging.getLogger(__name__)
# First check if we have any cookies
cookies_path = get_cookies_path()
try:
with open(cookies_path, 'r') as f:
cookies = json.load(f)
if not cookies:
logger.error("No cookies available")
return False
except (FileNotFoundError, json.JSONDecodeError):
logger.error("No valid cookies found")
return False
# Check session health
if await maintain_session_health():
logger.info("Session is healthy")
return True
# Session needs refresh
logger.info("Session needs refresh, attempting navigation refresh")
return await refresh_session_state(cookies)
async def get_session_status(debug: bool = False) -> Envelope[dict]:
logger = logging.getLogger(__name__)
try:
# First get basic cookie information
info = get_session_info()
# If we have session cookies, validate they actually work with Schwab
if info.get('session_status') == 'active':
logger.debug("Session cookies found, validating with Schwab...")
# Use maintain_session_health to actually test the session
is_healthy = await maintain_session_health()
if not is_healthy:
# Update status to reflect that cookies exist but are invalid
info['session_status'] = 'invalid'
info['validation_error'] = 'Session cookies exist but Schwab authentication failed'
logger.warning("Session validation failed: cookies present but not accepted by Schwab")
else:
logger.debug("Session validation succeeded")
logger.debug("Session status info: %s", info)
return ok(info)
except Exception as exc:
logger.exception("Failed to gather session status")
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
async def refresh_session(debug: bool = False) -> Envelope[None]:
logger = logging.getLogger(__name__)
try:
refreshed = await refresh_session_state()
if refreshed:
logger.info("Session refresh succeeded")
return ok(None)
logger.warning("Session refresh failed")
return fail("Session refresh failed", ErrorType.AUTHENTICATION, retryable=True)
except Exception as exc:
logger.exception("Exception during session refresh")
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
async def set_cookies_from_file(path: str, debug: bool = False) -> Envelope[None]:
logger = logging.getLogger(__name__)
try:
with open(path, "r") as fh:
cookies = json.load(fh)
cookies_path = get_cookies_path()
with open(cookies_path, "w") as fh:
json.dump(cookies, fh, indent=2)
logger.info("Imported %s cookies from %s", len(cookies), path)
return ok(None)
except (FileNotFoundError, json.JSONDecodeError) as exc:
logger.error("Failed to load cookies from %s: %s", path, exc)
return fail(str(exc), ErrorType.VALIDATION, retryable=False)
except Exception as exc:
logger.exception("Unexpected error importing cookies from %s", path)
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
async def export_cookies(path: str, debug: bool = False) -> Envelope[None]:
logger = logging.getLogger(__name__)
cookies_path = get_cookies_path()
try:
with open(cookies_path, "r") as fh:
cookies = json.load(fh)
with open(path, "w") as fh:
json.dump(cookies, fh, indent=2)
logger.info("Exported %s cookies to %s", len(cookies), path)
return ok(None)
except (FileNotFoundError, json.JSONDecodeError) as exc:
logger.error("Failed to read cookies for export: %s", exc)
return fail(str(exc), ErrorType.AUTHENTICATION, retryable=False)
except Exception as exc:
logger.exception("Unexpected error exporting cookies to %s", path)
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)