Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
This commit is contained in:
470
schwab_scraper/browser/session.py
Normal file
470
schwab_scraper/browser/session.py
Normal file
@@ -0,0 +1,470 @@
|
||||
"""
|
||||
Session management module for maintaining Schwab authenticated sessions.
|
||||
This module provides functionality to refresh session state through browser navigation
|
||||
without requiring 2FA approval for active sessions.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
from ..core.config import load_config, get_playwright_url, get_cookies_path
|
||||
from .client import new_context, new_page
|
||||
from ..core import ErrorType, Envelope, fail, ok
|
||||
|
||||
|
||||
async def refresh_session_state(cookies: Optional[List[Dict[str, Any]]] = None) -> bool:
|
||||
"""
|
||||
Refresh session state through browser navigation.
|
||||
|
||||
This function maintains active sessions by navigating to a Schwab page,
|
||||
which updates cookie expiration times and session state without requiring
|
||||
2FA approval for active sessions.
|
||||
|
||||
Args:
|
||||
cookies: Optional list of cookies to use. If None, loads from cookies.json
|
||||
|
||||
Returns:
|
||||
bool: True if session refresh was successful, False otherwise
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
logger.info("Starting session refresh through navigation")
|
||||
|
||||
# Load cookies if not provided
|
||||
if cookies is None:
|
||||
cookies_path = get_cookies_path()
|
||||
try:
|
||||
with open(cookies_path, 'r') as f:
|
||||
cookies = json.load(f)
|
||||
logger.info(f"Loaded {len(cookies) if cookies else 0} cookies from {cookies_path}")
|
||||
except (FileNotFoundError, json.JSONDecodeError) as e:
|
||||
logger.error(f"Could not load cookies: {e}")
|
||||
return False
|
||||
|
||||
if not cookies:
|
||||
logger.error("No cookies available for session refresh")
|
||||
return False
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
|
||||
async with async_playwright() as p:
|
||||
try:
|
||||
browser = await p.chromium.connect(playwright_url)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to browser: {e}")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create context with existing cookies
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
# Navigate to refresh session state
|
||||
logger.info("Navigating to Schwab research page to refresh session")
|
||||
await page.goto("https://client.schwab.com/app/research/#/stocks/AAPL", timeout=30000)
|
||||
await page.wait_for_timeout(2000) # Let page settle and cookies update
|
||||
|
||||
# Check if navigation was successful (no redirect to login)
|
||||
current_url = page.url
|
||||
is_redirected = any(pattern in current_url for pattern in [
|
||||
'/login', '/signin', '/auth', '/Access/'
|
||||
])
|
||||
|
||||
if is_redirected:
|
||||
logger.warning(f"Session refresh failed: redirected to login page")
|
||||
logger.debug(f"Current URL: {current_url}")
|
||||
await context.close()
|
||||
await browser.close()
|
||||
return False
|
||||
|
||||
# Get updated cookies after navigation
|
||||
new_cookies = await context.cookies()
|
||||
logger.info(f"Retrieved {len(new_cookies)} cookies after navigation")
|
||||
|
||||
# Check if we still have critical session cookies
|
||||
critical_session_cookies = ['LVAL', 'NS2', 'sstate']
|
||||
missing_critical_cookies = []
|
||||
|
||||
for cookie_name in critical_session_cookies:
|
||||
old_cookie = next((c for c in cookies if c['name'] == cookie_name), None)
|
||||
new_cookie = next((c for c in new_cookies if c['name'] == cookie_name), None)
|
||||
|
||||
if not new_cookie:
|
||||
missing_critical_cookies.append(cookie_name)
|
||||
elif old_cookie and new_cookie.get('expires') != -1:
|
||||
# Session cookies should have expires = -1
|
||||
missing_critical_cookies.append(f"{cookie_name} (invalid session cookie)")
|
||||
|
||||
if missing_critical_cookies:
|
||||
logger.warning(f"Session refresh failed: missing critical session cookies: {missing_critical_cookies}")
|
||||
await context.close()
|
||||
await browser.close()
|
||||
return False
|
||||
|
||||
# Compare cookie states to detect changes
|
||||
changes = []
|
||||
old_dict = {c['name']: c for c in cookies}
|
||||
new_dict = {c['name']: c for c in new_cookies}
|
||||
|
||||
# Check for modified cookies (especially expiration changes)
|
||||
for name in old_dict:
|
||||
if name in new_dict:
|
||||
old_cookie = old_dict[name]
|
||||
new_cookie = new_dict[name]
|
||||
|
||||
# Check if expiration changed
|
||||
old_expires = old_cookie.get('expires', -1)
|
||||
new_expires = new_cookie.get('expires', -1)
|
||||
if old_expires != new_expires:
|
||||
changes.append({
|
||||
'type': 'expiration_changed',
|
||||
'name': name,
|
||||
'old_expires': old_expires,
|
||||
'new_expires': new_expires
|
||||
})
|
||||
|
||||
if changes:
|
||||
logger.info(f"Detected {len(changes)} cookie changes (session refreshed)")
|
||||
for change in changes[:3]: # Show first 3
|
||||
logger.debug(f" {change['name']}: expiration updated")
|
||||
else:
|
||||
logger.info("No cookie changes detected (session maintained)")
|
||||
|
||||
# Save updated cookies
|
||||
cookies_path = get_cookies_path()
|
||||
with open(cookies_path, 'w') as f:
|
||||
json.dump(new_cookies, f, indent=2)
|
||||
logger.info(f"Saved {len(new_cookies)} updated cookies")
|
||||
|
||||
await context.close()
|
||||
await browser.close()
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during session refresh: {e}")
|
||||
try:
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
await browser.close()
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Session refresh failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
async def maintain_session_health() -> bool:
|
||||
"""
|
||||
Check if the current session is healthy by attempting a simple navigation.
|
||||
|
||||
Returns:
|
||||
bool: True if session is healthy, False if refresh is needed
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
logger.info("Checking session health")
|
||||
|
||||
# Load current cookies
|
||||
cookies_path = get_cookies_path()
|
||||
try:
|
||||
with open(cookies_path, 'r') as f:
|
||||
cookies = json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
logger.error("No valid cookies found")
|
||||
return False
|
||||
|
||||
if not cookies:
|
||||
logger.error("No cookies available")
|
||||
return False
|
||||
|
||||
# First, check if we have valid session cookies (basic check)
|
||||
current_time = int(time.time())
|
||||
has_valid_session_cookies = False
|
||||
|
||||
for cookie in cookies:
|
||||
name = cookie.get('name', '')
|
||||
expires = cookie.get('expires', -1)
|
||||
|
||||
# Check for actual Schwab session cookies
|
||||
if name in ['auth', 'ASP.NET_SessionId', 'SessionInfo', '__RequestVerificationToken']:
|
||||
# Session cookies (expires=-1) are valid until browser closes
|
||||
# Other cookies must not be expired
|
||||
if expires == -1 or (expires and expires > current_time):
|
||||
has_valid_session_cookies = True
|
||||
break
|
||||
|
||||
if not has_valid_session_cookies:
|
||||
logger.warning("Session health check: FAILED - no valid session cookies found")
|
||||
return False
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.connect(playwright_url)
|
||||
|
||||
try:
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
# Navigate to a simple page to test session
|
||||
await page.goto("https://client.schwab.com/app/research/#/stocks/AAPL", timeout=30000)
|
||||
|
||||
# Check if we're still authenticated by URL pattern
|
||||
current_url = page.url
|
||||
logger.debug(f"Current URL after navigation: {current_url}")
|
||||
|
||||
is_authenticated_by_url = any(pattern in current_url for pattern in [
|
||||
'/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/summary'
|
||||
])
|
||||
|
||||
# Check for login redirect patterns
|
||||
is_redirected = any(pattern in current_url for pattern in [
|
||||
'/login', '/signin', '/auth', '/Access/'
|
||||
])
|
||||
|
||||
logger.debug(f"Authenticated by URL pattern: {is_authenticated_by_url}")
|
||||
logger.debug(f"Redirected to login: {is_redirected}")
|
||||
|
||||
# Primary check: If we're not redirected and have a good URL pattern, we're authenticated
|
||||
if is_authenticated_by_url and not is_redirected:
|
||||
logger.info("Session health check: PASSED - authenticated URL detected")
|
||||
result = True
|
||||
elif is_redirected:
|
||||
logger.warning("Session health check: FAILED - redirect to login detected")
|
||||
result = False
|
||||
else:
|
||||
# Secondary check: Look for any page content that indicates we're not on a login page
|
||||
try:
|
||||
# Check for login form elements
|
||||
login_indicators = [
|
||||
'input[type="password"]',
|
||||
'input[name*="login"]',
|
||||
'input[name*="user"]',
|
||||
'input[id*="login"]',
|
||||
'input[id*="user"]',
|
||||
'button:has-text("Log In")',
|
||||
'button:has-text("Sign In")'
|
||||
]
|
||||
|
||||
login_found = False
|
||||
for selector in login_indicators:
|
||||
login_element = await page.query_selector(selector)
|
||||
if login_element:
|
||||
login_found = True
|
||||
break
|
||||
|
||||
if login_found:
|
||||
logger.warning("Session health check: FAILED - login form detected")
|
||||
result = False
|
||||
else:
|
||||
logger.info("Session health check: PASSED - no login form detected")
|
||||
result = True
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Login form check error: {e}")
|
||||
# If we can't check, assume healthy if we have valid cookies and no redirect
|
||||
logger.info("Session health check: PASSED - based on cookies and URL")
|
||||
result = True
|
||||
|
||||
await context.close()
|
||||
await browser.close()
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Session health check error: {e}")
|
||||
try:
|
||||
await context.close()
|
||||
except:
|
||||
pass
|
||||
await browser.close()
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Session health check failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def get_session_info() -> Dict[str, Any]:
|
||||
"""
|
||||
Get information about the current session state.
|
||||
|
||||
Returns:
|
||||
Dict containing session information
|
||||
"""
|
||||
cookies_path = get_cookies_path()
|
||||
try:
|
||||
with open(cookies_path, 'r') as f:
|
||||
cookies = json.load(f)
|
||||
|
||||
session_cookies = []
|
||||
expiring_cookies = []
|
||||
current_time = datetime.now().timestamp()
|
||||
|
||||
for cookie in cookies:
|
||||
name = cookie.get('name', '')
|
||||
expires = cookie.get('expires', -1)
|
||||
|
||||
# Check if this is a session-related cookie
|
||||
if any(keyword in name.lower() for keyword in ['session', 'auth', 'token']):
|
||||
session_cookies.append({
|
||||
'name': name,
|
||||
'domain': cookie.get('domain', ''),
|
||||
'expires': expires,
|
||||
'is_session_cookie': expires == -1
|
||||
})
|
||||
|
||||
if expires != -1 and expires > 0:
|
||||
days_until_expire = (expires - current_time) / (24 * 3600)
|
||||
if days_until_expire < 7: # Expiring within a week
|
||||
expiring_cookies.append({
|
||||
'name': name,
|
||||
'days_until_expire': days_until_expire
|
||||
})
|
||||
|
||||
return {
|
||||
'total_cookies': len(cookies),
|
||||
'session_cookies': len(session_cookies),
|
||||
'expiring_cookies': len(expiring_cookies),
|
||||
'expiring_soon': expiring_cookies,
|
||||
'session_status': 'active' if session_cookies else 'no_session_cookies'
|
||||
}
|
||||
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {
|
||||
'error': 'No valid cookies found',
|
||||
'total_cookies': 0,
|
||||
'session_cookies': 0,
|
||||
'expiring_cookies': 0,
|
||||
'expiring_soon': [],
|
||||
'session_status': 'missing_cookies'
|
||||
}
|
||||
|
||||
|
||||
async def ensure_valid_session() -> bool:
|
||||
"""
|
||||
Ensure we have a valid session, attempting refresh if needed.
|
||||
|
||||
Returns:
|
||||
bool: True if a valid session exists or was successfully refreshed
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# First check if we have any cookies
|
||||
cookies_path = get_cookies_path()
|
||||
try:
|
||||
with open(cookies_path, 'r') as f:
|
||||
cookies = json.load(f)
|
||||
|
||||
if not cookies:
|
||||
logger.error("No cookies available")
|
||||
return False
|
||||
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
logger.error("No valid cookies found")
|
||||
return False
|
||||
|
||||
# Check session health
|
||||
if await maintain_session_health():
|
||||
logger.info("Session is healthy")
|
||||
return True
|
||||
|
||||
# Session needs refresh
|
||||
logger.info("Session needs refresh, attempting navigation refresh")
|
||||
return await refresh_session_state(cookies)
|
||||
|
||||
|
||||
async def get_session_status(debug: bool = False) -> Envelope[dict]:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
# First get basic cookie information
|
||||
info = get_session_info()
|
||||
|
||||
# If we have session cookies, validate they actually work with Schwab
|
||||
if info.get('session_status') == 'active':
|
||||
logger.debug("Session cookies found, validating with Schwab...")
|
||||
|
||||
# Use maintain_session_health to actually test the session
|
||||
is_healthy = await maintain_session_health()
|
||||
|
||||
if not is_healthy:
|
||||
# Update status to reflect that cookies exist but are invalid
|
||||
info['session_status'] = 'invalid'
|
||||
info['validation_error'] = 'Session cookies exist but Schwab authentication failed'
|
||||
logger.warning("Session validation failed: cookies present but not accepted by Schwab")
|
||||
else:
|
||||
logger.debug("Session validation succeeded")
|
||||
|
||||
logger.debug("Session status info: %s", info)
|
||||
return ok(info)
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to gather session status")
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
|
||||
|
||||
async def refresh_session(debug: bool = False) -> Envelope[None]:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
refreshed = await refresh_session_state()
|
||||
if refreshed:
|
||||
logger.info("Session refresh succeeded")
|
||||
return ok(None)
|
||||
logger.warning("Session refresh failed")
|
||||
return fail("Session refresh failed", ErrorType.AUTHENTICATION, retryable=True)
|
||||
except Exception as exc:
|
||||
logger.exception("Exception during session refresh")
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
|
||||
|
||||
async def set_cookies_from_file(path: str, debug: bool = False) -> Envelope[None]:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
with open(path, "r") as fh:
|
||||
cookies = json.load(fh)
|
||||
|
||||
cookies_path = get_cookies_path()
|
||||
with open(cookies_path, "w") as fh:
|
||||
json.dump(cookies, fh, indent=2)
|
||||
|
||||
logger.info("Imported %s cookies from %s", len(cookies), path)
|
||||
return ok(None)
|
||||
except (FileNotFoundError, json.JSONDecodeError) as exc:
|
||||
logger.error("Failed to load cookies from %s: %s", path, exc)
|
||||
return fail(str(exc), ErrorType.VALIDATION, retryable=False)
|
||||
except Exception as exc:
|
||||
logger.exception("Unexpected error importing cookies from %s", path)
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
|
||||
|
||||
async def export_cookies(path: str, debug: bool = False) -> Envelope[None]:
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
cookies_path = get_cookies_path()
|
||||
try:
|
||||
with open(cookies_path, "r") as fh:
|
||||
cookies = json.load(fh)
|
||||
|
||||
with open(path, "w") as fh:
|
||||
json.dump(cookies, fh, indent=2)
|
||||
|
||||
logger.info("Exported %s cookies to %s", len(cookies), path)
|
||||
return ok(None)
|
||||
except (FileNotFoundError, json.JSONDecodeError) as exc:
|
||||
logger.error("Failed to read cookies for export: %s", exc)
|
||||
return fail(str(exc), ErrorType.AUTHENTICATION, retryable=False)
|
||||
except Exception as exc:
|
||||
logger.exception("Unexpected error exporting cookies to %s", path)
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
Reference in New Issue
Block a user