import json import os import time import logging from typing import List, Dict, Any, Optional, Tuple from playwright.async_api import async_playwright from ..core.config import load_config, get_playwright_url, get_cookies_path from ..utils.logging import save_debug_artifact async def is_session_valid() -> bool: """Check if current cookies.json contains a valid session This function validates that we have a truly valid session by checking: 1. Multiple critical session cookies are present 2. Those cookies haven't expired 3. Session was established relatively recently (within 24 hours) Note: Cookie expiry times alone are not sufficient - Schwab may invalidate sessions server-side. This function performs client-side validation only. """ logger = logging.getLogger(__name__) cookies_path = get_cookies_path() try: with open(cookies_path, 'r') as f: cookies = json.load(f) if not cookies: logger.debug("Session validation: No cookies found") return False current_time = int(time.time()) logger.debug(f"Session validation: Checking {len(cookies)} cookies") # CRITICAL session cookies - at least 3 of these should be present and valid critical_session_cookies = { 'auth': 'Primary authentication token', 'ASP.NET_SessionId': 'Session ID', 'NS2': 'Schwab session state', 'LVAL': 'Login token', '__RequestVerificationToken': 'CSRF token' } # NON-CRITICAL cookies that may expire non_critical_cookies = { 'SessionInfo', 'SS2', 'O2', 'sstate', 'pstate' } valid_critical_cookies = {} validation_details = [] for cookie in cookies: cookie_name = cookie.get('name', '') expiry = cookie.get('expires', -1) if cookie_name in critical_session_cookies: is_expired = expiry != -1 and expiry <= current_time is_valid = expiry == -1 or (expiry and expiry > current_time) validation_details.append({ 'name': cookie_name, 'valid': is_valid, 'expires': expiry, 'expired': is_expired, 'current_time': current_time }) if is_valid: valid_critical_cookies[cookie_name] = True logger.debug(f"✓ Critical session cookie '{cookie_name}' is valid") else: logger.debug(f"✗ Critical session cookie '{cookie_name}' is expired (expires={expiry}, now={current_time})") # Require at least 3 critical cookies to be valid min_required = 3 has_valid_session = len(valid_critical_cookies) >= min_required if not has_valid_session: logger.warning(f"Session validation FAILED: Only {len(valid_critical_cookies)} critical cookies valid (need ≥{min_required})") for detail in validation_details: logger.debug(f" {detail['name']}: {detail['valid']} (expires={detail['expires']})") else: logger.debug(f"✓ Session validation SUCCESS: {len(valid_critical_cookies)} critical cookies valid") logger.debug(f" Valid cookies: {list(valid_critical_cookies.keys())}") return has_valid_session except (FileNotFoundError, json.JSONDecodeError) as e: logger.debug(f"Session validation error: {e}") return False async def login_to_schwab(username: str, password: str) -> Optional[List[Dict[str, Any]]]: """ Perform automated login to Schwab using the remote browser (browserless). On success, saves cookies to `cookies.json` and returns the cookies list. Uses robust iframe detection and dynamic field detection. IMPORTANT: This function starts with a CLEAN SLATE - any existing stale cookies are cleared before the login attempt. This prevents authentication failures from mixing old session state with new credentials. """ import time login_start_time = time.time() logger = logging.getLogger(__name__) # CRITICAL: Clear any existing cookies before attempting fresh login # Stale cookies can cause Schwab to reject the authentication cookies_path = get_cookies_path() try: if os.path.exists(cookies_path): os.remove(cookies_path) logger.info(f"Cleared stale cookies file before fresh login: {cookies_path}") except Exception as e: logger.warning(f"Could not clear cookies file before login: {e}") config = load_config() playwright_url = get_playwright_url(config) async with async_playwright() as p: browser = await p.chromium.connect(playwright_url) # Create context with realistic headers and fingerprinting context = await browser.new_context( user_agent=( 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' ), viewport={'width': 1920, 'height': 1200}, device_scale_factor=1.0, locale='en-US', timezone_id='America/New_York', permissions=['geolocation', 'notifications'], geolocation={'latitude': 40.7128, 'longitude': -74.0060}, extra_http_headers={ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-Language': 'en-US,en;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Cache-Control': 'max-age=0', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'none', 'Sec-Fetch-User': '?1', 'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"', 'Sec-Ch-Ua-Mobile': '?0', 'Sec-Ch-Ua-Platform': '"Windows"', 'Upgrade-Insecure-Requests': '1', 'Dnt': '1', }, ) # Enhanced anti-detection script await context.add_init_script( ''' // Core webdriver hiding Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); delete navigator.__proto__.webdriver; // Enhanced plugin spoofing Object.defineProperty(navigator, 'plugins', { get: () => [ { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format', length: 1 }, { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '', length: 1 }, { name: 'Native Client', filename: 'internal-nacl-plugin', description: 'Native Client', length: 1 }, ] }); // Language and locale consistency Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); Object.defineProperty(navigator, 'language', { get: () => 'en-US' }); // Screen properties matching viewport Object.defineProperty(screen, 'width', { get: () => 1920 }); Object.defineProperty(screen, 'height', { get: () => 1080 }); Object.defineProperty(screen, 'availWidth', { get: () => 1920 }); Object.defineProperty(screen, 'availHeight', { get: () => 1040 }); Object.defineProperty(screen, 'colorDepth', { get: () => 24 }); Object.defineProperty(screen, 'pixelDepth', { get: () => 24 }); // Permission handling const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) => ( parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters) ); // Canvas fingerprinting resistance const getContext = HTMLCanvasElement.prototype.getContext; HTMLCanvasElement.prototype.getContext = function(type) { const context = getContext.call(this, type); if (type === '2d') { const getImageData = context.getImageData; context.getImageData = function(x, y, width, height) { const imageData = getImageData.call(this, x, y, width, height); // Add slight noise to canvas fingerprinting for (let i = 0; i < imageData.data.length; i += 4) { if (Math.random() < 0.1) { imageData.data[i] += Math.floor(Math.random() * 10) - 5; imageData.data[i + 1] += Math.floor(Math.random() * 10) - 5; imageData.data[i + 2] += Math.floor(Math.random() * 10) - 5; } } return imageData; }; } return context; }; // Hardware concurrency and memory Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 }); Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 }); // WebGL fingerprinting const getParameter = WebGLRenderingContext.prototype.getParameter; WebGLRenderingContext.prototype.getParameter = function(parameter) { if (parameter === 37445) { return 'Intel Inc.'; } if (parameter === 37446) { return 'Intel(R) HD Graphics 620'; } if (parameter === 7936) { return 'WebKit'; } if (parameter === 7937) { return 'WebKit WebGL'; } return getParameter.call(this, parameter); }; // Mouse movement tracking evasion ['mousemove', 'mousedown', 'mouseup', 'click'].forEach(eventType => { document.addEventListener(eventType, function(e) { Object.defineProperty(e, 'isTrusted', { value: true, writable: false }); }, true); }); // Keyboard event evasion ['keydown', 'keypress', 'keyup'].forEach(eventType => { document.addEventListener(eventType, function(e) { Object.defineProperty(e, 'isTrusted', { value: true, writable: false }); }, true); }); // Hide automation indicators Object.defineProperty(window, 'chrome', { get: () => ({ runtime: {}, loadTimes: function() {}, csi: function() {}, app: {} }) }); // Spoof connection type Object.defineProperty(navigator, 'connection', { get: () => ({ effectiveType: '4g', rtt: 100, downlink: 10, saveData: false }) }); ''' ) page = await context.new_page() # Track authentication API calls for debugging and success detection auth_api_calls = [] fpa_fixes = 0 async def log_request(request): if 'api/v2/auth' in request.url: endpoint = '/login' if '/login' in request.url else '/assert' logger.debug(f"AUTH API REQUEST: {request.method} {endpoint}") # Only log payload in verbose debug mode (when --debug is used twice) if logger.getEffectiveLevel() <= 5 and request.post_data: # TRACE level logger.debug(f"AUTH API PAYLOAD: {request.post_data[:200]}...") async def log_response(response): if 'api/v2/auth' in response.url: endpoint = 'login' if '/login' in response.url else 'assert' logger.debug(f"AUTH API RESPONSE: {response.status} {endpoint}") # Track all auth API responses for success determination auth_api_calls.append({ 'url': response.url, 'status': response.status, 'endpoint': endpoint }) try: response_text = await response.text() # Check for authentication failure indicators if response.status == 403 or 'Access Denied' in response_text: logger.debug(f"❌ AUTHENTICATION FAILED: {response.status} - {endpoint}") elif response.status == 200 and '/assert' in response.url: # Check if 2FA is pending if 'wait_for_approval' in response_text or 'mobile_approve' in response_text: logger.debug("📱 2FA mobile approval requested") elif 'External process is pending' in response_text: logger.debug("📱 Waiting for 2FA approval...") except: logger.debug("AUTH API RESPONSE BODY: [could not read]") # Intercept and modify auth API requests to fix FPA parameter async def intercept_auth_request(route, request): nonlocal fpa_fixes if 'api/v2/auth' in request.url and 'FPA=false' in request.url: # Fix the FPA parameter from false to true modified_url = request.url.replace('FPA=false', 'FPA=true') fpa_fixes += 1 if fpa_fixes == 1: # Only log the first fix logger.debug("Fixed FPA parameter for authentication requests") await route.continue_(url=modified_url) else: await route.continue_() # Route auth API calls through our interceptor await page.route('**/api/v2/auth/**', intercept_auth_request) page.on('request', log_request) page.on('response', log_response) try: logger.debug("Navigating to Schwab login page…") await page.goto("https://client.schwab.com/Areas/Access/Login", timeout=60000) logger.debug("Waiting for login iframe…") iframe_element, iframe = await robust_iframe_wait(page) if not iframe or not iframe_element: logger.error("Could not access login iframe") raise Exception("Could not access login iframe") logger.debug("Accessed iframe content") iframe = await resolve_login_inner_frame(iframe) logger.debug("Waiting for login form…") try: await iframe.wait_for_load_state('domcontentloaded', timeout=15000) logger.debug("Login form DOM loaded") try: await iframe.wait_for_load_state('networkidle', timeout=5000) logger.debug("Login form network idle achieved") except Exception: logger.debug("Network idle timeout, proceeding") except Exception as e: logger.debug(f"DOM load timeout: {e}") logger.debug("Finding login fields…") username_field, password_field = await find_login_fields_dynamically(iframe) logger.debug(f"Dynamic detection result - username: {username_field}, password: {password_field}") if not username_field or not password_field: logger.debug("Dynamic detection failed; falling back to heuristics") # Basic fallbacks fallback_user = [ 'input[autocomplete="username"]', 'input[type="text"][id="loginIdInput"]', 'input[type="text"][placeholder*="Login ID"]', 'input[name*="login"]', 'input[id*="login"]', 'input[type="text"]' ] fallback_pwd = [ 'input[autocomplete="current-password"]', 'input[type="password"][id="passwordInput"]', 'input[type="password"][placeholder*="Password"]', 'input[name*="password"]', 'input[id*="password"]', 'input[type="password"]' ] original_username = username_field original_password = password_field for sel in fallback_user: try: if await iframe.is_visible(sel): username_field = sel logger.debug(f"Fallback username field found: {sel}") break except Exception: pass for sel in fallback_pwd: try: if await iframe.is_visible(sel): password_field = sel logger.debug(f"Fallback password field found: {sel}") break except Exception: pass logger.debug(f"After fallback - username: {username_field}, password: {password_field}") if not username_field or not password_field: # Dump all input fields for debugging try: all_inputs = await iframe.query_selector_all('input') logger.debug(f"Found {len(all_inputs)} total input fields:") for i, inp in enumerate(all_inputs): try: input_type = await inp.get_attribute('type') or 'text' input_id = await inp.get_attribute('id') or '' input_name = await inp.get_attribute('name') or '' input_placeholder = await inp.get_attribute('placeholder') or '' input_autocomplete = await inp.get_attribute('autocomplete') or '' is_visible = await inp.is_visible() logger.debug(f" Input {i}: type='{input_type}', id='{input_id}', name='{input_name}', placeholder='{input_placeholder}', autocomplete='{input_autocomplete}', visible={is_visible}") except Exception: pass except Exception as e: logger.debug(f"Could not enumerate input fields: {e}") raise Exception("Login fields not found") logger.debug("Filling credentials…") # Debug: Check what fields we're actually targeting try: username_element = await iframe.query_selector(username_field) password_element = await iframe.query_selector(password_field) if username_element: username_attrs = await username_element.evaluate('el => ({ id: el.id, name: el.name, type: el.type, placeholder: el.placeholder })') logger.debug(f"Username field attributes: {username_attrs}") else: logger.debug(f"Username field not found with selector: {username_field}") if password_element: password_attrs = await password_element.evaluate('el => ({ id: el.id, name: el.name, type: el.type, placeholder: el.placeholder })') logger.debug(f"Password field attributes: {password_attrs}") else: logger.debug(f"Password field not found with selector: {password_field}") except Exception as e: logger.debug(f"Error checking field attributes: {e}") # Fill credentials using the original working approach (reverted from git history) logger.debug("Filling credentials…") try: await iframe.fill(username_field, '') await iframe.fill(username_field, username) logger.debug("Username filled using fill()") except Exception as e: logger.debug(f"Username fill failed: {e}, trying click+type fallback") try: await iframe.click(username_field, timeout=5000) await iframe.type(username_field, username, delay=25) logger.debug("Username filled using click+type fallback") except Exception as e2: logger.debug(f"Username click+type also failed: {e2}") try: await iframe.fill(password_field, '') await iframe.fill(password_field, password) logger.debug("Password filled using fill()") except Exception as e: logger.debug(f"Password fill failed: {e}, trying click+type fallback") try: await iframe.click(password_field, timeout=5000) await iframe.type(password_field, password, delay=25) logger.debug("Password filled using click+type fallback") except Exception as e2: logger.debug(f"Password click+type also failed: {e2}") # Verify filled values (original approach) try: user_val = await iframe.input_value(username_field) _ = len(await iframe.input_value(password_field)) logger.debug(f"Credentials filled (username len={len(user_val)})") except Exception: logger.debug("Could not verify input values; proceeding") # Find submit button submit_selectors = [ 'button[type="submit"]', 'input[type="submit"]', 'button:has-text("Log In")', 'button:has-text("Sign In")', 'button:has-text("Continue")', '[role="button"]:has-text("Log In")', '[role="button"]' ] submit_button = None for sel in submit_selectors: try: if await iframe.is_visible(sel): submit_button = sel; break except Exception: pass if not submit_button: raise Exception("Submit button not found") # Wait for page JavaScript to fully initialize before submission # The HAR shows that successful logins require the frontend JS to be ready # and the FPA parameter to be set to true (not false) logger.debug("Waiting for authentication JavaScript to initialize...") try: await iframe.wait_for_function( '''() => { // Check if authentication-related JavaScript objects are available return window.fetch !== undefined && document.readyState === 'complete' && (window.crypto !== undefined || window.msCrypto !== undefined); }''', timeout=10000 ) logger.debug("Authentication JavaScript appears ready") except Exception as e: logger.debug(f"JavaScript readiness check failed: {e}, proceeding anyway") # Additional wait to ensure all JavaScript is loaded, including FPA initialization await page.wait_for_timeout(3000) # Try to trigger FPA=true by ensuring all fraud prevention scripts are loaded try: await iframe.evaluate(''' () => { // Try to trigger any deferred authentication scripts if (window.dispatchEvent) { window.dispatchEvent(new Event('load')); window.dispatchEvent(new Event('DOMContentLoaded')); } // Allow time for fraud prevention analytics to initialize return true; } ''') logger.debug("Triggered fraud prevention analytics initialization") # Wait longer for FPA to be set to true await page.wait_for_timeout(2000) except Exception as e: logger.debug(f"FPA initialization failed: {e}, proceeding anyway") logger.debug("Submitting login form…") # Ensure form submission triggers proper JavaScript events # The HAR shows that successful login triggers /api/v2/auth/login with device fingerprinting async with page.expect_response( lambda response: ( 'sws-gateway-nr.schwab.com/api/v2/auth' in response.url or 'client.schwab.com/Areas/Access/SignOn/Auth' in response.url ), timeout=60000 ) as response_info: try: # First try: Trigger form submission via JavaScript to ensure events fire logger.debug("Attempting JavaScript form submission to trigger auth API calls...") await iframe.evaluate(''' () => { const form = document.querySelector('form'); if (form) { // Dispatch input events to ensure form validation const inputs = form.querySelectorAll('input'); inputs.forEach(input => { input.dispatchEvent(new Event('input', { bubbles: true })); input.dispatchEvent(new Event('change', { bubbles: true })); }); // Trigger form submission form.dispatchEvent(new Event('submit', { bubbles: true, cancelable: true })); return true; } return false; } ''') # Wait a moment for JavaScript processing await page.wait_for_timeout(1000) # Then click the submit button to ensure UI state changes await iframe.click(submit_button) logger.debug("Submit button clicked after JavaScript events") except Exception as e: logger.debug(f"JavaScript submission failed: {e}, trying fallback methods") try: await iframe.press(password_field, 'Enter') logger.debug("Enter key pressed") except Exception: await iframe.click(submit_button, force=True) logger.debug("Force click attempted") # Wait for all authentication API calls to complete await page.wait_for_timeout(5000) try: response = await response_info.value logger.debug(f"Primary authentication response: {response.status} - {response.url}") except Exception as e: logger.debug(f"Response monitoring error: {e}") # Analyze authentication API calls to determine success/failure logger.debug(f"Analyzing {len(auth_api_calls)} authentication API calls...") login_success = False assert_success = False auth_failed = False for call in auth_api_calls: if call['endpoint'] == 'login' and call['status'] == 200: login_success = True elif call['endpoint'] == 'assert' and call['status'] == 200: assert_success = True elif call['status'] == 403: auth_failed = True # Determine overall authentication status if auth_failed: logger.debug("❌ Authentication failed: 403 Access Denied") elif login_success and assert_success: logger.debug("✅ Authentication successful - proceeding to 2FA flow") await page.wait_for_timeout(3000) elif login_success and not assert_success: logger.debug("⚠️ Partial success - waiting for password validation") await page.wait_for_timeout(3000) else: logger.debug("❓ Authentication status unclear") # Quick check for login errors after submission await page.wait_for_timeout(2000) try: iframe_element = await page.query_selector('#lmsIframe') if iframe_element: iframe_check = await iframe_element.content_frame() if iframe_check: # Look for error messages error_text = await iframe_check.evaluate('''() => { const errorElements = document.querySelectorAll('[style*="color: red"], .error, .alert-danger'); for (let el of errorElements) { const text = el.textContent.trim(); if (text && (text.toLowerCase().includes('invalid') || text.toLowerCase().includes('incorrect'))) { return text; } } return null; }''') if error_text: logger.error(f"Login failed with error: {error_text}") await browser.close() return None except Exception as e: logger.debug(f"Error check failed: {e}") # OAuth flow wait with enhanced detection logger.debug("Waiting for OAuth authorization flow…") try: await page.wait_for_function( '''() => { const mainUrl = window.location.href; console.log('OAuth wait check - Current URL:', mainUrl); // Check for direct success patterns first const successPatterns = [ '/summary', '/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/clientapps/accounts', '/positions', '/portfolio' ]; if (successPatterns.some(pattern => mainUrl.includes(pattern))) { console.log('Direct success redirect detected:', mainUrl); return true; } // Check iframe src for auth flow const iframe = document.querySelector('#lmsIframe'); if (!iframe) { console.log('No iframe found, checking for redirect...'); return false; } const iframeSrc = iframe.getAttribute('src'); console.log('Iframe src:', iframeSrc); if (iframeSrc && (iframeSrc.includes('SignOn/Auth') || iframeSrc.includes('code=') || iframeSrc.includes('redirecturi='))) { console.log('OAuth iframe detected:', iframeSrc); return true; } return false; }''', timeout=30000 ) logger.debug("OAuth authorization flow detected successfully") # Now wait for OAuth completion - check iframe content and try to interact logger.debug("Waiting for OAuth flow completion...") # Give iframe time to load OAuth content await page.wait_for_timeout(3000) # Try to interact with OAuth consent screen in iframe if present try: iframe_element = await page.query_selector('#lmsIframe') if iframe_element: iframe = await iframe_element.content_frame() if iframe: # Wait for iframe to load await iframe.wait_for_load_state('domcontentloaded', timeout=10000) # Debug: check what's in the iframe try: iframe_url = iframe.url iframe_title = await iframe.title() logger.debug(f"OAuth iframe loaded - URL: {iframe_url}, Title: {iframe_title}") # Check if this iframe is showing a login form that needs credentials login_form_check = await iframe.evaluate('''() => { const usernameFields = document.querySelectorAll('input[type="text"], input[id*="login"], input[name*="login"], input[placeholder*="login"]'); const passwordFields = document.querySelectorAll('input[type="password"]'); const errorElements = document.querySelectorAll('.error, [class*="error"], [class*="invalid"]'); return { hasUsernameField: usernameFields.length > 0, hasPasswordField: passwordFields.length > 0, errorCount: errorElements.length, errorMessages: Array.from(errorElements).map(el => el.textContent.trim()), pageText: document.body.textContent.trim().substring(0, 200) }; }''') logger.debug(f"OAuth iframe form analysis: {login_form_check}") # If this is a separate login form, try to fill it if login_form_check['hasUsernameField'] and login_form_check['hasPasswordField']: logger.debug("OAuth iframe has separate login form - attempting to fill credentials") # Try to find and fill fields in OAuth iframe try: oauth_username_selectors = [ 'input[type="text"]', 'input[id*="login"]', 'input[name*="login"]', 'input[placeholder*="login"]', 'input[autocomplete="username"]' ] oauth_password_selectors = [ 'input[type="password"]', 'input[id*="password"]', 'input[name*="password"]' ] # Fill username in OAuth iframe for sel in oauth_username_selectors: try: if await iframe.is_visible(sel): await iframe.fill(sel, username) logger.debug(f"Filled OAuth username field: {sel}") break except Exception: pass # Fill password in OAuth iframe for sel in oauth_password_selectors: try: if await iframe.is_visible(sel): await iframe.fill(sel, password) logger.debug(f"Filled OAuth password field: {sel}") break except Exception: pass await page.wait_for_timeout(1000) # Now submit the OAuth iframe form oauth_submit_selectors = [ 'button[type="submit"]', 'input[type="submit"]', 'button:has-text("Log in")', 'button:has-text("Log In")', 'button:has-text("Sign in")', 'button:has-text("Sign In")', 'button:has-text("Continue")', 'button' ] for submit_sel in oauth_submit_selectors: try: if await iframe.is_visible(submit_sel): button_text = await iframe.text_content(submit_sel) logger.debug(f"Submitting OAuth iframe form with button: {submit_sel} (text: {button_text})") await iframe.click(submit_sel) await page.wait_for_timeout(2000) break except Exception: pass # Check if the error disappeared after submitting and look for next steps try: await page.wait_for_timeout(3000) # Wait for form processing post_submit_check = await iframe.evaluate('''() => { const errorElements = document.querySelectorAll('.error, [class*="error"], [class*="invalid"]'); const errorText = Array.from(errorElements).map(el => el.textContent.trim()).join(' '); // Look for "Having trouble" buttons const buttons = Array.from(document.querySelectorAll('button, a, [role="button"]')); const buttonTexts = buttons.map(btn => ({ text: btn.textContent.trim(), tag: btn.tagName.toLowerCase(), visible: btn.offsetParent !== null })).filter(btn => btn.visible); return { hasErrors: errorElements.length > 0, errorText: errorText, currentUrl: window.location.href, availableButtons: buttonTexts }; }''') logger.debug(f"OAuth iframe post-submit status: {post_submit_check}") # If we see "Having trouble" text, try to click the "No, I'll try" button if 'Having trouble' in post_submit_check.get('errorText', '') or any('trouble' in btn['text'].lower() for btn in post_submit_check.get('availableButtons', [])): logger.debug("Found 'Having trouble' page, looking for bypass button...") trouble_selectors = [ "button:has-text(\"No, I'll try\")", 'button:has-text("No, I\'ll try")', 'button:has-text("try")', "a:has-text(\"No, I'll try\")", 'a:has-text("No, I\'ll try")', '[role="button"]:has-text("try")' ] for trouble_sel in trouble_selectors: try: if await iframe.is_visible(trouble_sel): button_text = await iframe.text_content(trouble_sel) logger.debug(f"Clicking trouble bypass button: {trouble_sel} (text: {button_text})") await iframe.click(trouble_sel) await page.wait_for_timeout(3000) break except Exception: pass except Exception: pass except Exception as oauth_fill_error: logger.debug(f"Error filling OAuth iframe credentials: {oauth_fill_error}") # Get all visible elements for debugging visible_elements = await iframe.evaluate('''() => { const elements = []; document.querySelectorAll('*').forEach(el => { if (el.offsetParent !== null && el.textContent.trim()) { const rect = el.getBoundingClientRect(); if (rect.width > 0 && rect.height > 0) { elements.push({ tag: el.tagName.toLowerCase(), text: el.textContent.trim().substring(0, 100), type: el.type || '', id: el.id || '', className: el.className || '' }); } } }); return elements.slice(0, 10); // Limit to first 10 visible elements }''') logger.debug(f"Visible elements in OAuth iframe: {visible_elements}") except Exception as debug_error: logger.debug(f"Error debugging iframe content: {debug_error}") # Check for OAuth consent buttons and specific Schwab flow buttons consent_selectors = [ 'button:has-text("No, I\'ll try")', # Schwab account assistance bypass 'button:has-text("Continue")', 'button:has-text("Allow")', 'button:has-text("Accept")', 'button:has-text("Approve")', 'input[type="submit"]', 'button[type="submit"]', 'button', 'input[type="button"]' # Add generic button selectors ] for sel in consent_selectors: try: if await iframe.is_visible(sel): button_text = await iframe.text_content(sel) logger.debug(f"Found clickable element: {sel} with text: {button_text}") await iframe.click(sel) logger.debug(f"Clicked OAuth element: {sel}") await page.wait_for_timeout(2000) break except Exception: pass except Exception as e: logger.debug(f"Error interacting with OAuth iframe: {e}") # Now wait for completion try: await page.wait_for_function( '''() => { const mainUrl = window.location.href; console.log('OAuth completion check - Current URL:', mainUrl); // Check if main page redirected to success const successPatterns = [ '/summary', '/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/clientapps/accounts', '/positions', '/portfolio' ]; if (successPatterns.some(pattern => mainUrl.includes(pattern))) { console.log('Main page redirected to success:', mainUrl); return true; } // Check if iframe has navigated to 2FA/authenticators const iframe = document.querySelector('#lmsIframe'); if (iframe) { const iframeSrc = iframe.getAttribute('src'); console.log('OAuth completion iframe src:', iframeSrc); if (iframeSrc && iframeSrc.includes('authenticators')) { console.log('2FA/authenticators detected'); return true; } } return false; }''', timeout=30000 ) logger.debug("OAuth flow completion detected") except Exception as completion_error: logger.debug(f"OAuth completion timeout: {completion_error}") # Capture debug artifacts on OAuth timeout try: png = await page.screenshot(full_page=True) save_debug_artifact("debug_oauth_timeout.png", png) html = await page.content() save_debug_artifact("debug_oauth_timeout.html", html) # Try to get iframe content as well iframe_element = await page.query_selector('#lmsIframe') if iframe_element: iframe = await iframe_element.content_frame() if iframe: iframe_html = await iframe.content() save_debug_artifact("debug_oauth_iframe.html", iframe_html) iframe_png = await iframe.screenshot() save_debug_artifact("debug_oauth_iframe.png", iframe_png) logger.debug("OAuth timeout debug artifacts saved") except Exception: pass except Exception as e: logger.debug(f"OAuth flow monitoring error: {e}") # Check current URL and iframe after OAuth flow current_url = page.url logger.debug(f"Final URL check after OAuth: {current_url}") # Check iframe content for 2FA or completion status try: iframe_element = await page.query_selector('#lmsIframe') if iframe_element: iframe_src = await iframe_element.get_attribute('src') logger.debug(f"Final iframe src: {iframe_src}") if iframe_src and 'authenticators' in iframe_src: logger.debug("2FA/authenticators page detected - updating current_url for 2FA handling") current_url = iframe_src # Set current_url to iframe src for 2FA detection except Exception: pass if 'authenticators' not in current_url: current_url = page.url logger.debug(f"Current URL after OAuth flow: {current_url}") # Fast success if any(p in current_url for p in ['/clientapps/accounts', '/accounts/', '/app/', '/Apps/', '/Areas/Accounts', '/summary']): cookies = await context.cookies() # Convert Cookie objects to dictionaries for JSON serialization cookie_dicts = [ { 'name': cookie.get('name', ''), 'value': cookie.get('value', ''), 'domain': cookie.get('domain', ''), 'path': cookie.get('path', ''), 'expires': cookie.get('expires', -1), 'httpOnly': cookie.get('httpOnly', False), 'secure': cookie.get('secure', False), 'sameSite': cookie.get('sameSite', 'Lax') } for cookie in cookies ] cookies_path = get_cookies_path() with open(cookies_path, 'w') as f: json.dump(cookie_dicts, f, indent=2) # Log authentication summary login_duration = time.time() - login_start_time logger.debug("OAuth success; cookies saved") logger.debug(f"Login completed in {login_duration:.1f}s, {len(auth_api_calls)} API calls, {fpa_fixes} FPA fixes") await browser.close() return cookie_dicts # Authenticators page (2FA) if 'authenticators' in current_url or 'otp/code' in current_url: print("\n" + "="*70) print("📱 MFA APPROVAL REQUIRED") print("="*70) print("Attempting to intercept n8n webhook for SMS text code...") logger.info("Checking for SMS/Text message option...") try: target = page iframe_element = await page.query_selector('#lmsIframe') if iframe_element: target = await iframe_element.content_frame() or page sms_button = await target.query_selector('button:has-text("Text message"), button:has-text("SMS"), :text-matches("Text message", "i"), :text-matches("SMS", "i")') if sms_button: logger.info("Clicking the SMS/Text message option to send code...") await sms_button.click() await page.wait_for_timeout(2000) continue_btn = await target.query_selector('button:has-text("Continue"), button:has-text("Next")') if continue_btn: await continue_btn.click() await page.wait_for_timeout(2000) except Exception as e: logger.debug(f"Could not automatically click SMS option (maybe already sent code): {e}") logger.info("Polling n8n webhook for MFA code (up to 2 minutes)…") import aiohttp import asyncio mfa_code = None try: async with aiohttp.ClientSession() as session: for idx in range(60): # 2 minutes, every 2 seconds try: async with session.get("https://n8n.ext.ben.io/webhook/schwab-token") as resp: if resp.status == 200: data = await resp.json() if data: # Parse based on expected n8n output formats code = None if isinstance(data, dict): code = data.get("code") or data.get("token") or data.get("body", {}).get("code") elif isinstance(data, list) and len(data) > 0: code = data[-1].get("code") or data[-1].get("token") if code: mfa_code = code logger.info(f"Got MFA code from webhook: {mfa_code}") break except Exception as e: logger.debug(f"Webhook poll error: {e}") if idx % 10 == 0: print(f"Still waiting for webhook code... ({idx*2}s/120s)") await asyncio.sleep(2) except Exception as loop_e: logger.error(f"Error during webhook polling loop: {loop_e}") if mfa_code: logger.info("Entering MFA code into form...") try: target = page iframe_element = await page.query_selector('#lmsIframe') if iframe_element: target = await iframe_element.content_frame() or page # Commonly used ids and attributes for OTP inputs on Schwab code_input = await target.query_selector('input[type="text"], input[type="tel"], input[name*="code" i], input[id*="code" i], input[autocomplete*="one-time-code" i]') if code_input: await code_input.fill(str(mfa_code)) # Sometimes the submit button specifically says 'Trust device' or similar submit_btn = await target.query_selector('button[type="submit"], button:has-text("Continue"), button:has-text("Verify"), button:has-text("Submit"), button:has-text("Log in"), button[id*="submit"], button[id*="continue"]') if submit_btn: await submit_btn.click() print("Submitted MFA code successfully.") await page.wait_for_timeout(5000) else: await page.wait_for_timeout(5000) except Exception as e: logger.error(f"Failed to enter MFA code: {e}") try: await page.wait_for_function( '''() => { const url = window.location.href; console.log('2FA wait check - Current URL:', url); // More comprehensive URL patterns for Schwab success pages const successPatterns = [ 'SignOn/Auth', # OAuth auth code stage '/app/', # Main app '/Apps/', # Alternative app path '/accounts/', # Accounts page '/Areas/Accounts', # Alternative accounts path '/summary', # Account summary '/clientapps/accounts', # Client apps accounts '/positions', # Positions page '/portfolio' # Portfolio page ]; const success = successPatterns.some(pattern => url.includes(pattern)); if (success) { console.log('2FA wait completed successfully - URL changed to:', url); } return success; }''', timeout=60000 ) logger.debug("2FA flow completed/detected successfully") except Exception as e: logger.error(f"2FA timeout or error: {e}") current_url_after_timeout = page.url logger.debug(f"URL after 2FA timeout: {current_url_after_timeout}") # Check if we're actually on a success page despite the timeout success_patterns = ['/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/summary', '/clientapps/accounts', '/positions', '/portfolio'] if any(pattern in current_url_after_timeout for pattern in success_patterns): logger.info("2FA timeout, but URL indicates success - continuing") else: # Capture debug artifacts on 2FA failure try: png = await page.screenshot(full_page=True) save_debug_artifact("debug_2fa_timeout.png", png) html = await page.content() save_debug_artifact("debug_2fa_timeout.html", html) logger.debug("2FA timeout debug artifacts saved") except Exception: pass # Try one more time with a shorter timeout to see if page redirected logger.info("Attempting 2FA recovery check...") try: await page.wait_for_function( '''() => { const url = window.location.href; const successPatterns = ['/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/summary', '/clientapps/accounts', '/positions', '/portfolio']; return successPatterns.some(pattern => url.includes(pattern)); }''', timeout=10000 ) logger.info("2FA recovery successful") except Exception: logger.error("2FA recovery failed - login unsuccessful") # Clean up bad cookies on MFA failure to prevent bad state cookies_path = get_cookies_path() try: logger.warning("Removing invalid cookies after MFA failure to prevent bad state") if os.path.exists(cookies_path): os.remove(cookies_path) logger.debug(f"Removed invalid cookies at {cookies_path}") except Exception as cleanup_error: logger.error(f"Failed to clean up cookies: {cleanup_error}") raise # Authorization code stage elif 'SignOn/Auth' in current_url: try: await page.wait_for_function( '''() => { const url = window.location.href; return url.includes('/app/') || url.includes('/Apps/') || url.includes('/accounts/') || url.includes('/Areas/Accounts'); }''', timeout=60000 ) except Exception: logger.debug("OAuth token exchange timeout; attempting to continue") # Try clicking continue/accept if present try: await page.wait_for_selector('button, input[type="submit"], a[href*="app"]', timeout=10000) for sel in ['button:has-text("Continue")', 'button:has-text("Accept")', 'button:has-text("Allow")', 'input[type="submit"]', 'a[href*="/app/"]']: try: if await page.is_visible(sel): await page.click(sel) break except Exception: pass except Exception: pass # Finalize try: await page.wait_for_load_state('domcontentloaded', timeout=5000) except Exception: pass final_url = page.url logger.debug(f"Final URL after OAuth flow: {final_url}") if any(p in final_url for p in ['/app/', '/Apps/', '/accounts/', '/Areas/Accounts']): cookies = await context.cookies() # Convert Cookie objects to dictionaries for JSON serialization cookie_dicts = [ { 'name': cookie.get('name', ''), 'value': cookie.get('value', ''), 'domain': cookie.get('domain', ''), 'path': cookie.get('path', ''), 'expires': cookie.get('expires', -1), 'httpOnly': cookie.get('httpOnly', False), 'secure': cookie.get('secure', False), 'sameSite': cookie.get('sameSite', 'Lax') } for cookie in cookies ] cookies_path = get_cookies_path() with open(cookies_path, 'w') as f: json.dump(cookie_dicts, f, indent=2) logger.debug("OAuth success; cookies saved") await browser.close() return cookie_dicts except Exception as e: logger.error(f"Login error: {e}") # Failure path: capture artifacts try: png = await page.screenshot(full_page=True) save_debug_artifact("debug_oauth_failed.png", png) html = await page.content() save_debug_artifact("debug_oauth_failed.html", html) except Exception: pass # Clean up bad cookies on login failure to prevent bad state cookies_path = get_cookies_path() try: logger.warning("Removing invalid cookies after login failure to prevent bad state") if os.path.exists(cookies_path): os.remove(cookies_path) logger.debug(f"Removed invalid cookies at {cookies_path}") except Exception as cleanup_error: logger.error(f"Failed to clean up cookies: {cleanup_error}") await browser.close() return None async def ensure_cookies() -> Optional[List[Dict[str, Any]]]: """Shared helper to ensure we have valid cookies. Attempts to use existing `cookies.json` if it appears valid; otherwise performs automated login using credentials from `config.json` when available. IMPORTANT: Stale cookies can cause authentication failures even if they haven't technically expired. This function implements: 1. Client-side validation (expiry time checks) 2. Fallback to fresh login if validation fails 3. Automatic cleanup of stale cookies on login attempt """ logger = logging.getLogger(__name__) cookies_path = get_cookies_path() # Try existing cookies if they appear to contain a valid session try: if await is_session_valid(): logger.debug("Existing cookies appear valid, attempting to load...") try: with open(cookies_path, 'r') as f: cookies = json.load(f) if cookies: logger.info(f"Using {len(cookies)} cached cookies from disk") return cookies except (FileNotFoundError, json.JSONDecodeError): logger.debug("Could not load valid cookies from disk") except Exception as e: logger.debug(f"Cookie validation failed: {e}") # If we reach here, existing cookies are not valid logger.info("Existing cookies not valid or not found. Attempting fresh login...") # Attempt automated login using config credentials try: from ..core.config import load_config, get_schwab_credentials config = load_config() username, password = get_schwab_credentials(config) if username and password: # IMPORTANT: Clear stale cookies before attempting new login # This prevents authentication failures from mixing old session state with new credentials try: if os.path.exists(cookies_path): logger.debug(f"Clearing stale cookies before fresh login attempt: {cookies_path}") os.remove(cookies_path) except Exception as cleanup_error: logger.warning(f"Failed to clear stale cookies: {cleanup_error}") logger.info("Starting fresh login process...") cookies = await login_to_schwab(username, password) if cookies: logger.info(f"Fresh login successful, obtained {len(cookies)} cookies") return cookies else: logger.error("Fresh login failed to produce cookies") except Exception as e: logger.error(f"Login attempt failed: {e}") logger.error("Unable to establish valid session") return None # ----- Helpers migrated from legacy scraper ----- async def find_login_fields_dynamically(iframe) -> Tuple[Optional[str], Optional[str]]: """Try multiple strategies to find username/password fields inside iframe.""" logger = logging.getLogger(__name__) try: # Strategy 1: Form-based forms = await iframe.query_selector_all('form') for form in forms: text_inputs = await form.query_selector_all('input[type="text"], input[type="email"], input:not([type])') pwd_inputs = await form.query_selector_all('input[type="password"]') if text_inputs and pwd_inputs: async def sel(inp): ac = (await inp.get_attribute('autocomplete')) or '' iid = (await inp.get_attribute('id')) or '' nm = (await inp.get_attribute('name')) or '' if ac: return f'input[autocomplete="{ac}"]' if iid: return f'#{iid}' if nm: return f'input[name="{nm}"]' return 'input[type="text"], input[type="email"], input:not([type])' return await sel(text_inputs[0]), await sel(pwd_inputs[0]) # Strategy 2: Proximity/attributes password_fields = await iframe.query_selector_all('input[type="password"]') for pwd in password_fields: pwd_id = (await pwd.get_attribute('id')) or '' pwd_name = (await pwd.get_attribute('name')) or '' ac = (await pwd.get_attribute('autocomplete')) or '' pwd_sel = 'input[autocomplete="current-password"]' if ac == 'current-password' else (f'#{pwd_id}' if pwd_id else (f'input[name="{pwd_name}"]' if pwd_name else 'input[type="password"]')) for cand in [ 'input[autocomplete="username"]', 'input[type="email"]', 'input[name*="login" i]', 'input[id*="login" i]', 'input[name*="user" i]', 'input[id*="user" i]', 'input[aria-label*="Login" i]', 'input[placeholder*="Login" i]', 'input[placeholder*="User" i]', 'input[type="text"]' ]: try: if await iframe.is_visible(cand): return cand, pwd_sel except Exception: pass # Strategy 3: Scoring all_inputs = await iframe.query_selector_all('input') username_candidates: List[Tuple[str, int]] = [] password_candidates: List[str] = [] for el in all_inputs: input_type = (await el.get_attribute('type')) or '' name = (await el.get_attribute('name')) or '' iid = (await el.get_attribute('id')) or '' placeholder = (await el.get_attribute('placeholder')) or '' aria = (await el.get_attribute('aria-label')) or '' ac = (await el.get_attribute('autocomplete')) or '' if input_type.lower() in ['text', 'email', ''] and input_type.lower() != 'password': score = 0 text = f"{name} {iid} {placeholder} {aria}".lower() for kw in ['login', 'user', 'email', 'username', 'id', 'account']: if kw in text: score += 1 if ac.lower() == 'username': score += 3 is_vis = await iframe.is_visible(f'input[name="{name}"]' if name else (f'#{iid}' if iid else 'input')) if is_vis: score += 2 if score > 0: selector = f'input[autocomplete="{ac}"]' if ac else (f'input[name="{name}"]' if name else (f'#{iid}' if iid else None)) if selector: username_candidates.append((selector, score)) if input_type.lower() == 'password': is_vis = await iframe.is_visible(f'input[name="{name}"]' if name else (f'#{iid}' if iid else 'input[type="password"]')) if is_vis: selector = f'input[autocomplete="{ac}"]' if ac else (f'input[name="{name}"]' if name else (f'#{iid}' if iid else 'input[type="password"]')) password_candidates.append(selector) if username_candidates and password_candidates: return max(username_candidates, key=lambda x: x[1])[0], password_candidates[0] return None, None except Exception as e: logger.debug(f"Dynamic detection error: {e}") return None, None async def resolve_login_inner_frame(iframe_root): """Some deployments nest the actual login form inside another iframe.""" try: try: if await iframe_root.query_selector('input[type="password"]'): return iframe_root except Exception: pass child_iframes = await iframe_root.query_selector_all('iframe') for child in child_iframes: try: sub = await child.content_frame() if not sub: continue await sub.wait_for_load_state('domcontentloaded', timeout=5000) if await sub.query_selector('input[type="password"]'): return sub except Exception: continue return iframe_root except Exception: return iframe_root async def robust_iframe_wait(page, iframe_selector: str = '#lmsIframe', max_retries: int = 3, timeout: int = 30000): """Robustly wait for login iframe with retries and multiple strategies.""" logger = logging.getLogger(__name__) for attempt in range(max_retries): try: try: await page.wait_for_selector(iframe_selector, timeout=timeout // max_retries) iframe_element = await page.wait_for_selector(iframe_selector) iframe = await iframe_element.content_frame() if iframe: await iframe.wait_for_load_state('domcontentloaded', timeout=10000) return iframe_element, iframe except Exception: pass try: iframes = await page.query_selector_all('iframe') for iframe_elem in iframes: iframe_id = await iframe_elem.get_attribute('id') if 'lms' in (iframe_id or '').lower(): iframe = await iframe_elem.content_frame() if iframe: await iframe.wait_for_load_state('domcontentloaded', timeout=5000) return iframe_elem, iframe except Exception: pass try: iframe_elems = await page.query_selector_all('iframe') for iframe_elem in iframe_elems: src = await iframe_elem.get_attribute('src') or '' if any(k in src.lower() for k in ['login', 'auth', 'signin']): iframe = await iframe_elem.content_frame() if iframe: await iframe.wait_for_load_state('domcontentloaded', timeout=5000) return iframe_elem, iframe except Exception: pass if attempt < max_retries - 1: await page.wait_for_timeout(2000) except Exception: if attempt < max_retries - 1: await page.wait_for_timeout(2000) logger.debug("Failed to find login iframe after all attempts") return None, None