Files
schwab-mcp-custom/schwab_scraper/browser/auth.py
b3nw a05ba3b8a8
All checks were successful
Build and Push Docker Image / build (push) Successful in 36s
fix(positions): sync latest scraper fixes from main repository
2026-04-24 21:34:38 +00:00

1432 lines
74 KiB
Python

import json
import os
import time
import logging
from typing import List, Dict, Any, Optional, Tuple
from playwright.async_api import async_playwright
from ..core.config import load_config, get_playwright_url, get_cookies_path
from ..utils.logging import save_debug_artifact
async def is_session_valid() -> bool:
"""Check if current cookies.json contains a valid session
This function validates that we have a truly valid session by checking:
1. Multiple critical session cookies are present
2. Those cookies haven't expired
3. Session was established relatively recently (within 24 hours)
Note: Cookie expiry times alone are not sufficient - Schwab may invalidate
sessions server-side. This function performs client-side validation only.
"""
logger = logging.getLogger(__name__)
cookies_path = get_cookies_path()
try:
with open(cookies_path, 'r') as f:
cookies = json.load(f)
if not cookies:
logger.debug("Session validation: No cookies found")
return False
current_time = int(time.time())
logger.debug(f"Session validation: Checking {len(cookies)} cookies")
# CRITICAL session cookies - at least 3 of these should be present and valid
critical_session_cookies = {
'auth': 'Primary authentication token',
'ASP.NET_SessionId': 'Session ID',
'NS2': 'Schwab session state',
'LVAL': 'Login token',
'__RequestVerificationToken': 'CSRF token'
}
# NON-CRITICAL cookies that may expire
non_critical_cookies = {
'SessionInfo',
'SS2',
'O2',
'sstate',
'pstate'
}
valid_critical_cookies = {}
validation_details = []
for cookie in cookies:
cookie_name = cookie.get('name', '')
expiry = cookie.get('expires', -1)
if cookie_name in critical_session_cookies:
is_expired = expiry != -1 and expiry <= current_time
is_valid = expiry == -1 or (expiry and expiry > current_time)
validation_details.append({
'name': cookie_name,
'valid': is_valid,
'expires': expiry,
'expired': is_expired,
'current_time': current_time
})
if is_valid:
valid_critical_cookies[cookie_name] = True
logger.debug(f"✓ Critical session cookie '{cookie_name}' is valid")
else:
logger.debug(f"✗ Critical session cookie '{cookie_name}' is expired (expires={expiry}, now={current_time})")
# Require at least 3 critical cookies to be valid
min_required = 3
has_valid_session = len(valid_critical_cookies) >= min_required
if not has_valid_session:
logger.warning(f"Session validation FAILED: Only {len(valid_critical_cookies)} critical cookies valid (need ≥{min_required})")
for detail in validation_details:
logger.debug(f" {detail['name']}: {detail['valid']} (expires={detail['expires']})")
else:
logger.debug(f"✓ Session validation SUCCESS: {len(valid_critical_cookies)} critical cookies valid")
logger.debug(f" Valid cookies: {list(valid_critical_cookies.keys())}")
return has_valid_session
except (FileNotFoundError, json.JSONDecodeError) as e:
logger.debug(f"Session validation error: {e}")
return False
async def login_to_schwab(username: str, password: str) -> Optional[List[Dict[str, Any]]]:
"""
Perform automated login to Schwab using the remote browser (browserless).
On success, saves cookies to `cookies.json` and returns the cookies list.
Uses robust iframe detection and dynamic field detection.
IMPORTANT: This function starts with a CLEAN SLATE - any existing stale cookies
are cleared before the login attempt. This prevents authentication failures from
mixing old session state with new credentials.
"""
import time
login_start_time = time.time()
logger = logging.getLogger(__name__)
# CRITICAL: Clear any existing cookies before attempting fresh login
# Stale cookies can cause Schwab to reject the authentication
cookies_path = get_cookies_path()
try:
if os.path.exists(cookies_path):
os.remove(cookies_path)
logger.info(f"Cleared stale cookies file before fresh login: {cookies_path}")
except Exception as e:
logger.warning(f"Could not clear cookies file before login: {e}")
config = load_config()
playwright_url = get_playwright_url(config)
async with async_playwright() as p:
browser = await p.chromium.connect(playwright_url)
# Create context with realistic headers and fingerprinting
context = await browser.new_context(
user_agent=(
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
),
viewport={'width': 1920, 'height': 1200},
device_scale_factor=1.0,
locale='en-US',
timezone_id='America/New_York',
permissions=['geolocation', 'notifications'],
geolocation={'latitude': 40.7128, 'longitude': -74.0060},
extra_http_headers={
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Cache-Control': 'max-age=0',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
'Sec-Ch-Ua-Mobile': '?0',
'Sec-Ch-Ua-Platform': '"Windows"',
'Upgrade-Insecure-Requests': '1',
'Dnt': '1',
},
)
# Enhanced anti-detection script
await context.add_init_script(
'''
// Core webdriver hiding
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
delete navigator.__proto__.webdriver;
// Enhanced plugin spoofing
Object.defineProperty(navigator, 'plugins', {
get: () => [
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format', length: 1 },
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '', length: 1 },
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: 'Native Client', length: 1 },
]
});
// Language and locale consistency
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
Object.defineProperty(navigator, 'language', { get: () => 'en-US' });
// Screen properties matching viewport
Object.defineProperty(screen, 'width', { get: () => 1920 });
Object.defineProperty(screen, 'height', { get: () => 1080 });
Object.defineProperty(screen, 'availWidth', { get: () => 1920 });
Object.defineProperty(screen, 'availHeight', { get: () => 1040 });
Object.defineProperty(screen, 'colorDepth', { get: () => 24 });
Object.defineProperty(screen, 'pixelDepth', { get: () => 24 });
// Permission handling
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
// Canvas fingerprinting resistance
const getContext = HTMLCanvasElement.prototype.getContext;
HTMLCanvasElement.prototype.getContext = function(type) {
const context = getContext.call(this, type);
if (type === '2d') {
const getImageData = context.getImageData;
context.getImageData = function(x, y, width, height) {
const imageData = getImageData.call(this, x, y, width, height);
// Add slight noise to canvas fingerprinting
for (let i = 0; i < imageData.data.length; i += 4) {
if (Math.random() < 0.1) {
imageData.data[i] += Math.floor(Math.random() * 10) - 5;
imageData.data[i + 1] += Math.floor(Math.random() * 10) - 5;
imageData.data[i + 2] += Math.floor(Math.random() * 10) - 5;
}
}
return imageData;
};
}
return context;
};
// Hardware concurrency and memory
Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
// WebGL fingerprinting
const getParameter = WebGLRenderingContext.prototype.getParameter;
WebGLRenderingContext.prototype.getParameter = function(parameter) {
if (parameter === 37445) { return 'Intel Inc.'; }
if (parameter === 37446) { return 'Intel(R) HD Graphics 620'; }
if (parameter === 7936) { return 'WebKit'; }
if (parameter === 7937) { return 'WebKit WebGL'; }
return getParameter.call(this, parameter);
};
// Mouse movement tracking evasion
['mousemove', 'mousedown', 'mouseup', 'click'].forEach(eventType => {
document.addEventListener(eventType, function(e) {
Object.defineProperty(e, 'isTrusted', { value: true, writable: false });
}, true);
});
// Keyboard event evasion
['keydown', 'keypress', 'keyup'].forEach(eventType => {
document.addEventListener(eventType, function(e) {
Object.defineProperty(e, 'isTrusted', { value: true, writable: false });
}, true);
});
// Hide automation indicators
Object.defineProperty(window, 'chrome', {
get: () => ({
runtime: {},
loadTimes: function() {},
csi: function() {},
app: {}
})
});
// Spoof connection type
Object.defineProperty(navigator, 'connection', {
get: () => ({
effectiveType: '4g',
rtt: 100,
downlink: 10,
saveData: false
})
});
'''
)
page = await context.new_page()
# Track authentication API calls for debugging and success detection
auth_api_calls = []
fpa_fixes = 0
async def log_request(request):
if 'api/v2/auth' in request.url:
endpoint = '/login' if '/login' in request.url else '/assert'
logger.debug(f"AUTH API REQUEST: {request.method} {endpoint}")
# Only log payload in verbose debug mode (when --debug is used twice)
if logger.getEffectiveLevel() <= 5 and request.post_data: # TRACE level
logger.debug(f"AUTH API PAYLOAD: {request.post_data[:200]}...")
async def log_response(response):
if 'api/v2/auth' in response.url:
endpoint = 'login' if '/login' in response.url else 'assert'
logger.debug(f"AUTH API RESPONSE: {response.status} {endpoint}")
# Track all auth API responses for success determination
auth_api_calls.append({
'url': response.url,
'status': response.status,
'endpoint': endpoint
})
try:
response_text = await response.text()
# Check for authentication failure indicators
if response.status == 403 or 'Access Denied' in response_text:
logger.debug(f"❌ AUTHENTICATION FAILED: {response.status} - {endpoint}")
elif response.status == 200 and '/assert' in response.url:
# Check if 2FA is pending
if 'wait_for_approval' in response_text or 'mobile_approve' in response_text:
logger.debug("📱 2FA mobile approval requested")
elif 'External process is pending' in response_text:
logger.debug("📱 Waiting for 2FA approval...")
except:
logger.debug("AUTH API RESPONSE BODY: [could not read]")
# Intercept and modify auth API requests to fix FPA parameter
async def intercept_auth_request(route, request):
nonlocal fpa_fixes
if 'api/v2/auth' in request.url and 'FPA=false' in request.url:
# Fix the FPA parameter from false to true
modified_url = request.url.replace('FPA=false', 'FPA=true')
fpa_fixes += 1
if fpa_fixes == 1: # Only log the first fix
logger.debug("Fixed FPA parameter for authentication requests")
await route.continue_(url=modified_url)
else:
await route.continue_()
# Route auth API calls through our interceptor
await page.route('**/api/v2/auth/**', intercept_auth_request)
page.on('request', log_request)
page.on('response', log_response)
try:
logger.debug("Navigating to Schwab login page…")
await page.goto("https://client.schwab.com/Areas/Access/Login", timeout=60000)
logger.debug("Waiting for login iframe…")
iframe_element, iframe = await robust_iframe_wait(page)
if not iframe or not iframe_element:
logger.error("Could not access login iframe")
raise Exception("Could not access login iframe")
logger.debug("Accessed iframe content")
iframe = await resolve_login_inner_frame(iframe)
logger.debug("Waiting for login form…")
try:
await iframe.wait_for_load_state('domcontentloaded', timeout=15000)
logger.debug("Login form DOM loaded")
try:
await iframe.wait_for_load_state('networkidle', timeout=5000)
logger.debug("Login form network idle achieved")
except Exception:
logger.debug("Network idle timeout, proceeding")
except Exception as e:
logger.debug(f"DOM load timeout: {e}")
logger.debug("Finding login fields…")
username_field, password_field = await find_login_fields_dynamically(iframe)
logger.debug(f"Dynamic detection result - username: {username_field}, password: {password_field}")
if not username_field or not password_field:
logger.debug("Dynamic detection failed; falling back to heuristics")
# Basic fallbacks
fallback_user = [
'input[autocomplete="username"]',
'input[type="text"][id="loginIdInput"]',
'input[type="text"][placeholder*="Login ID"]',
'input[name*="login"]', 'input[id*="login"]', 'input[type="text"]'
]
fallback_pwd = [
'input[autocomplete="current-password"]',
'input[type="password"][id="passwordInput"]',
'input[type="password"][placeholder*="Password"]',
'input[name*="password"]', 'input[id*="password"]', 'input[type="password"]'
]
original_username = username_field
original_password = password_field
for sel in fallback_user:
try:
if await iframe.is_visible(sel):
username_field = sel
logger.debug(f"Fallback username field found: {sel}")
break
except Exception:
pass
for sel in fallback_pwd:
try:
if await iframe.is_visible(sel):
password_field = sel
logger.debug(f"Fallback password field found: {sel}")
break
except Exception:
pass
logger.debug(f"After fallback - username: {username_field}, password: {password_field}")
if not username_field or not password_field:
# Dump all input fields for debugging
try:
all_inputs = await iframe.query_selector_all('input')
logger.debug(f"Found {len(all_inputs)} total input fields:")
for i, inp in enumerate(all_inputs):
try:
input_type = await inp.get_attribute('type') or 'text'
input_id = await inp.get_attribute('id') or ''
input_name = await inp.get_attribute('name') or ''
input_placeholder = await inp.get_attribute('placeholder') or ''
input_autocomplete = await inp.get_attribute('autocomplete') or ''
is_visible = await inp.is_visible()
logger.debug(f" Input {i}: type='{input_type}', id='{input_id}', name='{input_name}', placeholder='{input_placeholder}', autocomplete='{input_autocomplete}', visible={is_visible}")
except Exception:
pass
except Exception as e:
logger.debug(f"Could not enumerate input fields: {e}")
raise Exception("Login fields not found")
logger.debug("Filling credentials…")
# Debug: Check what fields we're actually targeting
try:
username_element = await iframe.query_selector(username_field)
password_element = await iframe.query_selector(password_field)
if username_element:
username_attrs = await username_element.evaluate('el => ({ id: el.id, name: el.name, type: el.type, placeholder: el.placeholder })')
logger.debug(f"Username field attributes: {username_attrs}")
else:
logger.debug(f"Username field not found with selector: {username_field}")
if password_element:
password_attrs = await password_element.evaluate('el => ({ id: el.id, name: el.name, type: el.type, placeholder: el.placeholder })')
logger.debug(f"Password field attributes: {password_attrs}")
else:
logger.debug(f"Password field not found with selector: {password_field}")
except Exception as e:
logger.debug(f"Error checking field attributes: {e}")
# Fill credentials using the original working approach (reverted from git history)
logger.debug("Filling credentials…")
try:
await iframe.fill(username_field, '')
await iframe.fill(username_field, username)
logger.debug("Username filled using fill()")
except Exception as e:
logger.debug(f"Username fill failed: {e}, trying click+type fallback")
try:
await iframe.click(username_field, timeout=5000)
await iframe.type(username_field, username, delay=25)
logger.debug("Username filled using click+type fallback")
except Exception as e2:
logger.debug(f"Username click+type also failed: {e2}")
try:
await iframe.fill(password_field, '')
await iframe.fill(password_field, password)
logger.debug("Password filled using fill()")
except Exception as e:
logger.debug(f"Password fill failed: {e}, trying click+type fallback")
try:
await iframe.click(password_field, timeout=5000)
await iframe.type(password_field, password, delay=25)
logger.debug("Password filled using click+type fallback")
except Exception as e2:
logger.debug(f"Password click+type also failed: {e2}")
# Verify filled values (original approach)
try:
user_val = await iframe.input_value(username_field)
_ = len(await iframe.input_value(password_field))
logger.debug(f"Credentials filled (username len={len(user_val)})")
except Exception:
logger.debug("Could not verify input values; proceeding")
# Find submit button
submit_selectors = [
'button[type="submit"]', 'input[type="submit"]',
'button:has-text("Log In")', 'button:has-text("Sign In")', 'button:has-text("Continue")',
'[role="button"]:has-text("Log In")', '[role="button"]'
]
submit_button = None
for sel in submit_selectors:
try:
if await iframe.is_visible(sel):
submit_button = sel; break
except Exception:
pass
if not submit_button:
raise Exception("Submit button not found")
# Wait for page JavaScript to fully initialize before submission
# The HAR shows that successful logins require the frontend JS to be ready
# and the FPA parameter to be set to true (not false)
logger.debug("Waiting for authentication JavaScript to initialize...")
try:
await iframe.wait_for_function(
'''() => {
// Check if authentication-related JavaScript objects are available
return window.fetch !== undefined &&
document.readyState === 'complete' &&
(window.crypto !== undefined || window.msCrypto !== undefined);
}''',
timeout=10000
)
logger.debug("Authentication JavaScript appears ready")
except Exception as e:
logger.debug(f"JavaScript readiness check failed: {e}, proceeding anyway")
# Additional wait to ensure all JavaScript is loaded, including FPA initialization
await page.wait_for_timeout(3000)
# Try to trigger FPA=true by ensuring all fraud prevention scripts are loaded
try:
await iframe.evaluate('''
() => {
// Try to trigger any deferred authentication scripts
if (window.dispatchEvent) {
window.dispatchEvent(new Event('load'));
window.dispatchEvent(new Event('DOMContentLoaded'));
}
// Allow time for fraud prevention analytics to initialize
return true;
}
''')
logger.debug("Triggered fraud prevention analytics initialization")
# Wait longer for FPA to be set to true
await page.wait_for_timeout(2000)
except Exception as e:
logger.debug(f"FPA initialization failed: {e}, proceeding anyway")
logger.debug("Submitting login form…")
# Ensure form submission triggers proper JavaScript events
# The HAR shows that successful login triggers /api/v2/auth/login with device fingerprinting
async with page.expect_response(
lambda response: (
'sws-gateway-nr.schwab.com/api/v2/auth' in response.url or
'client.schwab.com/Areas/Access/SignOn/Auth' in response.url
), timeout=60000
) as response_info:
try:
# First try: Trigger form submission via JavaScript to ensure events fire
logger.debug("Attempting JavaScript form submission to trigger auth API calls...")
await iframe.evaluate('''
() => {
const form = document.querySelector('form');
if (form) {
// Dispatch input events to ensure form validation
const inputs = form.querySelectorAll('input');
inputs.forEach(input => {
input.dispatchEvent(new Event('input', { bubbles: true }));
input.dispatchEvent(new Event('change', { bubbles: true }));
});
// Trigger form submission
form.dispatchEvent(new Event('submit', { bubbles: true, cancelable: true }));
return true;
}
return false;
}
''')
# Wait a moment for JavaScript processing
await page.wait_for_timeout(1000)
# Then click the submit button to ensure UI state changes
await iframe.click(submit_button)
logger.debug("Submit button clicked after JavaScript events")
except Exception as e:
logger.debug(f"JavaScript submission failed: {e}, trying fallback methods")
try:
await iframe.press(password_field, 'Enter')
logger.debug("Enter key pressed")
except Exception:
await iframe.click(submit_button, force=True)
logger.debug("Force click attempted")
# Wait for all authentication API calls to complete
await page.wait_for_timeout(5000)
try:
response = await response_info.value
logger.debug(f"Primary authentication response: {response.status} - {response.url}")
except Exception as e:
logger.debug(f"Response monitoring error: {e}")
# Analyze authentication API calls to determine success/failure
logger.debug(f"Analyzing {len(auth_api_calls)} authentication API calls...")
login_success = False
assert_success = False
auth_failed = False
for call in auth_api_calls:
if call['endpoint'] == 'login' and call['status'] == 200:
login_success = True
elif call['endpoint'] == 'assert' and call['status'] == 200:
assert_success = True
elif call['status'] == 403:
auth_failed = True
# Determine overall authentication status
if auth_failed:
logger.debug("❌ Authentication failed: 403 Access Denied")
elif login_success and assert_success:
logger.debug("✅ Authentication successful - proceeding to 2FA flow")
await page.wait_for_timeout(3000)
elif login_success and not assert_success:
logger.debug("⚠️ Partial success - waiting for password validation")
await page.wait_for_timeout(3000)
else:
logger.debug("❓ Authentication status unclear")
# Quick check for login errors after submission
await page.wait_for_timeout(2000)
try:
iframe_element = await page.query_selector('#lmsIframe')
if iframe_element:
iframe_check = await iframe_element.content_frame()
if iframe_check:
# Look for error messages
error_text = await iframe_check.evaluate('''() => {
const errorElements = document.querySelectorAll('[style*="color: red"], .error, .alert-danger');
for (let el of errorElements) {
const text = el.textContent.trim();
if (text && (text.toLowerCase().includes('invalid') || text.toLowerCase().includes('incorrect'))) {
return text;
}
}
return null;
}''')
if error_text:
logger.error(f"Login failed with error: {error_text}")
await browser.close()
return None
except Exception as e:
logger.debug(f"Error check failed: {e}")
# OAuth flow wait with enhanced detection
logger.debug("Waiting for OAuth authorization flow…")
try:
await page.wait_for_function(
'''() => {
const mainUrl = window.location.href;
console.log('OAuth wait check - Current URL:', mainUrl);
// Check for direct success patterns first
const successPatterns = [
'/summary', '/app/', '/Apps/', '/accounts/', '/Areas/Accounts',
'/clientapps/accounts', '/positions', '/portfolio'
];
if (successPatterns.some(pattern => mainUrl.includes(pattern))) {
console.log('Direct success redirect detected:', mainUrl);
return true;
}
// Check iframe src for auth flow
const iframe = document.querySelector('#lmsIframe');
if (!iframe) {
console.log('No iframe found, checking for redirect...');
return false;
}
const iframeSrc = iframe.getAttribute('src');
console.log('Iframe src:', iframeSrc);
if (iframeSrc && (iframeSrc.includes('SignOn/Auth') || iframeSrc.includes('code=') || iframeSrc.includes('redirecturi='))) {
console.log('OAuth iframe detected:', iframeSrc);
return true;
}
return false;
}''', timeout=30000
)
logger.debug("OAuth authorization flow detected successfully")
# Now wait for OAuth completion - check iframe content and try to interact
logger.debug("Waiting for OAuth flow completion...")
# Give iframe time to load OAuth content
await page.wait_for_timeout(3000)
# Try to interact with OAuth consent screen in iframe if present
try:
iframe_element = await page.query_selector('#lmsIframe')
if iframe_element:
iframe = await iframe_element.content_frame()
if iframe:
# Wait for iframe to load
await iframe.wait_for_load_state('domcontentloaded', timeout=10000)
# Debug: check what's in the iframe
try:
iframe_url = iframe.url
iframe_title = await iframe.title()
logger.debug(f"OAuth iframe loaded - URL: {iframe_url}, Title: {iframe_title}")
# Check if this iframe is showing a login form that needs credentials
login_form_check = await iframe.evaluate('''() => {
const usernameFields = document.querySelectorAll('input[type="text"], input[id*="login"], input[name*="login"], input[placeholder*="login"]');
const passwordFields = document.querySelectorAll('input[type="password"]');
const errorElements = document.querySelectorAll('.error, [class*="error"], [class*="invalid"]');
return {
hasUsernameField: usernameFields.length > 0,
hasPasswordField: passwordFields.length > 0,
errorCount: errorElements.length,
errorMessages: Array.from(errorElements).map(el => el.textContent.trim()),
pageText: document.body.textContent.trim().substring(0, 200)
};
}''')
logger.debug(f"OAuth iframe form analysis: {login_form_check}")
# If this is a separate login form, try to fill it
if login_form_check['hasUsernameField'] and login_form_check['hasPasswordField']:
logger.debug("OAuth iframe has separate login form - attempting to fill credentials")
# Try to find and fill fields in OAuth iframe
try:
oauth_username_selectors = [
'input[type="text"]', 'input[id*="login"]', 'input[name*="login"]',
'input[placeholder*="login"]', 'input[autocomplete="username"]'
]
oauth_password_selectors = [
'input[type="password"]', 'input[id*="password"]', 'input[name*="password"]'
]
# Fill username in OAuth iframe
for sel in oauth_username_selectors:
try:
if await iframe.is_visible(sel):
await iframe.fill(sel, username)
logger.debug(f"Filled OAuth username field: {sel}")
break
except Exception:
pass
# Fill password in OAuth iframe
for sel in oauth_password_selectors:
try:
if await iframe.is_visible(sel):
await iframe.fill(sel, password)
logger.debug(f"Filled OAuth password field: {sel}")
break
except Exception:
pass
await page.wait_for_timeout(1000)
# Now submit the OAuth iframe form
oauth_submit_selectors = [
'button[type="submit"]', 'input[type="submit"]',
'button:has-text("Log in")', 'button:has-text("Log In")',
'button:has-text("Sign in")', 'button:has-text("Sign In")',
'button:has-text("Continue")', 'button'
]
for submit_sel in oauth_submit_selectors:
try:
if await iframe.is_visible(submit_sel):
button_text = await iframe.text_content(submit_sel)
logger.debug(f"Submitting OAuth iframe form with button: {submit_sel} (text: {button_text})")
await iframe.click(submit_sel)
await page.wait_for_timeout(2000)
break
except Exception:
pass
# Check if the error disappeared after submitting and look for next steps
try:
await page.wait_for_timeout(3000) # Wait for form processing
post_submit_check = await iframe.evaluate('''() => {
const errorElements = document.querySelectorAll('.error, [class*="error"], [class*="invalid"]');
const errorText = Array.from(errorElements).map(el => el.textContent.trim()).join(' ');
// Look for "Having trouble" buttons
const buttons = Array.from(document.querySelectorAll('button, a, [role="button"]'));
const buttonTexts = buttons.map(btn => ({
text: btn.textContent.trim(),
tag: btn.tagName.toLowerCase(),
visible: btn.offsetParent !== null
})).filter(btn => btn.visible);
return {
hasErrors: errorElements.length > 0,
errorText: errorText,
currentUrl: window.location.href,
availableButtons: buttonTexts
};
}''')
logger.debug(f"OAuth iframe post-submit status: {post_submit_check}")
# If we see "Having trouble" text, try to click the "No, I'll try" button
if 'Having trouble' in post_submit_check.get('errorText', '') or any('trouble' in btn['text'].lower() for btn in post_submit_check.get('availableButtons', [])):
logger.debug("Found 'Having trouble' page, looking for bypass button...")
trouble_selectors = [
"button:has-text(\"No, I'll try\")",
'button:has-text("No, I\'ll try")',
'button:has-text("try")',
"a:has-text(\"No, I'll try\")",
'a:has-text("No, I\'ll try")',
'[role="button"]:has-text("try")'
]
for trouble_sel in trouble_selectors:
try:
if await iframe.is_visible(trouble_sel):
button_text = await iframe.text_content(trouble_sel)
logger.debug(f"Clicking trouble bypass button: {trouble_sel} (text: {button_text})")
await iframe.click(trouble_sel)
await page.wait_for_timeout(3000)
break
except Exception:
pass
except Exception:
pass
except Exception as oauth_fill_error:
logger.debug(f"Error filling OAuth iframe credentials: {oauth_fill_error}")
# Get all visible elements for debugging
visible_elements = await iframe.evaluate('''() => {
const elements = [];
document.querySelectorAll('*').forEach(el => {
if (el.offsetParent !== null && el.textContent.trim()) {
const rect = el.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
elements.push({
tag: el.tagName.toLowerCase(),
text: el.textContent.trim().substring(0, 100),
type: el.type || '',
id: el.id || '',
className: el.className || ''
});
}
}
});
return elements.slice(0, 10); // Limit to first 10 visible elements
}''')
logger.debug(f"Visible elements in OAuth iframe: {visible_elements}")
except Exception as debug_error:
logger.debug(f"Error debugging iframe content: {debug_error}")
# Check for OAuth consent buttons and specific Schwab flow buttons
consent_selectors = [
'button:has-text("No, I\'ll try")', # Schwab account assistance bypass
'button:has-text("Continue")', 'button:has-text("Allow")',
'button:has-text("Accept")', 'button:has-text("Approve")',
'input[type="submit"]', 'button[type="submit"]',
'button', 'input[type="button"]' # Add generic button selectors
]
for sel in consent_selectors:
try:
if await iframe.is_visible(sel):
button_text = await iframe.text_content(sel)
logger.debug(f"Found clickable element: {sel} with text: {button_text}")
await iframe.click(sel)
logger.debug(f"Clicked OAuth element: {sel}")
await page.wait_for_timeout(2000)
break
except Exception:
pass
except Exception as e:
logger.debug(f"Error interacting with OAuth iframe: {e}")
# Now wait for completion
try:
await page.wait_for_function(
'''() => {
const mainUrl = window.location.href;
console.log('OAuth completion check - Current URL:', mainUrl);
// Check if main page redirected to success
const successPatterns = [
'/summary', '/app/', '/Apps/', '/accounts/', '/Areas/Accounts',
'/clientapps/accounts', '/positions', '/portfolio'
];
if (successPatterns.some(pattern => mainUrl.includes(pattern))) {
console.log('Main page redirected to success:', mainUrl);
return true;
}
// Check if iframe has navigated to 2FA/authenticators
const iframe = document.querySelector('#lmsIframe');
if (iframe) {
const iframeSrc = iframe.getAttribute('src');
console.log('OAuth completion iframe src:', iframeSrc);
if (iframeSrc && iframeSrc.includes('authenticators')) {
console.log('2FA/authenticators detected');
return true;
}
}
return false;
}''', timeout=30000
)
logger.debug("OAuth flow completion detected")
except Exception as completion_error:
logger.debug(f"OAuth completion timeout: {completion_error}")
# Capture debug artifacts on OAuth timeout
try:
png = await page.screenshot(full_page=True)
save_debug_artifact("debug_oauth_timeout.png", png)
html = await page.content()
save_debug_artifact("debug_oauth_timeout.html", html)
# Try to get iframe content as well
iframe_element = await page.query_selector('#lmsIframe')
if iframe_element:
iframe = await iframe_element.content_frame()
if iframe:
iframe_html = await iframe.content()
save_debug_artifact("debug_oauth_iframe.html", iframe_html)
iframe_png = await iframe.screenshot()
save_debug_artifact("debug_oauth_iframe.png", iframe_png)
logger.debug("OAuth timeout debug artifacts saved")
except Exception:
pass
except Exception as e:
logger.debug(f"OAuth flow monitoring error: {e}")
# Check current URL and iframe after OAuth flow
current_url = page.url
logger.debug(f"Final URL check after OAuth: {current_url}")
# Check iframe content for 2FA or completion status
try:
iframe_element = await page.query_selector('#lmsIframe')
if iframe_element:
iframe_src = await iframe_element.get_attribute('src')
logger.debug(f"Final iframe src: {iframe_src}")
if iframe_src and 'authenticators' in iframe_src:
logger.debug("2FA/authenticators page detected - updating current_url for 2FA handling")
current_url = iframe_src # Set current_url to iframe src for 2FA detection
except Exception:
pass
if 'authenticators' not in current_url:
current_url = page.url
logger.debug(f"Current URL after OAuth flow: {current_url}")
# Fast success
if any(p in current_url for p in ['/clientapps/accounts', '/accounts/', '/app/', '/Apps/', '/Areas/Accounts', '/summary']):
cookies = await context.cookies()
# Convert Cookie objects to dictionaries for JSON serialization
cookie_dicts = [
{
'name': cookie.get('name', ''),
'value': cookie.get('value', ''),
'domain': cookie.get('domain', ''),
'path': cookie.get('path', ''),
'expires': cookie.get('expires', -1),
'httpOnly': cookie.get('httpOnly', False),
'secure': cookie.get('secure', False),
'sameSite': cookie.get('sameSite', 'Lax')
}
for cookie in cookies
]
cookies_path = get_cookies_path()
with open(cookies_path, 'w') as f:
json.dump(cookie_dicts, f, indent=2)
# Log authentication summary
login_duration = time.time() - login_start_time
logger.debug("OAuth success; cookies saved")
logger.debug(f"Login completed in {login_duration:.1f}s, {len(auth_api_calls)} API calls, {fpa_fixes} FPA fixes")
await browser.close()
return cookie_dicts
# Authenticators page (2FA)
if 'authenticators' in current_url or 'otp/code' in current_url:
print("\n" + "="*70)
print("📱 MFA APPROVAL REQUIRED")
print("="*70)
print("Attempting to intercept n8n webhook for SMS text code...")
logger.info("Checking for SMS/Text message option...")
try:
target = page
iframe_element = await page.query_selector('#lmsIframe')
if iframe_element:
target = await iframe_element.content_frame() or page
sms_button = await target.query_selector('button:has-text("Text message"), button:has-text("SMS"), :text-matches("Text message", "i"), :text-matches("SMS", "i")')
if sms_button:
logger.info("Clicking the SMS/Text message option to send code...")
await sms_button.click()
await page.wait_for_timeout(2000)
continue_btn = await target.query_selector('button:has-text("Continue"), button:has-text("Next")')
if continue_btn:
await continue_btn.click()
await page.wait_for_timeout(2000)
except Exception as e:
logger.debug(f"Could not automatically click SMS option (maybe already sent code): {e}")
logger.info("Polling n8n webhook for MFA code (up to 2 minutes)…")
import aiohttp
import asyncio
mfa_code = None
try:
logger.info("Waiting 5 seconds for email code to arrive before checking webhook...")
await asyncio.sleep(5)
async with aiohttp.ClientSession() as session:
for attempt in range(2):
print(f"Checking webhook for code (attempt {attempt + 1}/2)...")
try:
async with session.get("https://n8n.ext.ben.io/webhook/schwab-token") as resp:
if resp.status == 200:
data = await resp.json()
if data:
# Parse based on expected n8n output formats
code = None
if isinstance(data, dict):
code = data.get("code") or data.get("token") or data.get("login_code") or data.get("body", {}).get("code")
elif isinstance(data, list) and len(data) > 0:
code = data[-1].get("code") or data[-1].get("token") or data[-1].get("login_code")
if code:
mfa_code = code
logger.info(f"Got MFA code from webhook: {mfa_code}")
break
else:
logger.warning("Webhook returned data but no code found inside.")
else:
logger.warning(f"Webhook returned status code {resp.status}")
except Exception as e:
logger.debug(f"Webhook poll error: {e}")
if not mfa_code and attempt == 0:
logger.info("Token not found, waiting 10 seconds before 1 retry...")
await asyncio.sleep(10)
except Exception as loop_e:
logger.error(f"Error during webhook checking: {loop_e}")
if mfa_code:
logger.info("Entering MFA code into form...")
try:
# When on the sws-gateway-nr OTP page, the form is rendered
# directly on the page — there is no #lmsIframe wrapper here.
# Only look for the iframe when on the client.schwab.com login page.
current_page_url = page.url
if 'sws-gateway-nr' in current_page_url or 'otp' in current_page_url:
logger.debug(f"OTP page detected ({current_page_url}), querying form directly on page")
target = page
else:
target = page
iframe_element = await page.query_selector('#lmsIframe')
if iframe_element:
target = await iframe_element.content_frame() or page
# Commonly used ids and attributes for OTP inputs on Schwab
code_input = await target.query_selector('input[type="text"], input[type="tel"], input[name*="code" i], input[id*="code" i], input[autocomplete*="one-time-code" i]')
if code_input:
await code_input.fill(str(mfa_code))
logger.info(f"Filled OTP field with code: {mfa_code}")
# Sometimes the submit button specifically says 'Trust device' or similar
submit_btn = await target.query_selector('button[type="submit"], button:has-text("Continue"), button:has-text("Verify"), button:has-text("Submit"), button:has-text("Log in"), button[id*="submit"], button[id*="continue"]')
if submit_btn:
await submit_btn.click()
print("Submitted MFA code successfully.")
await page.wait_for_timeout(5000)
else:
logger.warning("Submit button not found after filling OTP — waiting anyway")
await page.wait_for_timeout(5000)
else:
logger.error("OTP input field not found on page")
except Exception as e:
logger.error(f"Failed to enter MFA code: {e}")
try:
await page.wait_for_function(
'''() => {
const url = window.location.href;
console.log('2FA wait check - Current URL:', url);
// More comprehensive URL patterns for Schwab success pages
const successPatterns = [
'SignOn/Auth', # OAuth auth code stage
'/app/', # Main app
'/Apps/', # Alternative app path
'/accounts/', # Accounts page
'/Areas/Accounts', # Alternative accounts path
'/summary', # Account summary
'/clientapps/accounts', # Client apps accounts
'/positions', # Positions page
'/portfolio' # Portfolio page
];
const success = successPatterns.some(pattern => url.includes(pattern));
if (success) {
console.log('2FA wait completed successfully - URL changed to:', url);
}
return success;
}''', timeout=60000
)
logger.debug("2FA flow completed/detected successfully")
except Exception as e:
logger.error(f"2FA timeout or error: {e}")
current_url_after_timeout = page.url
logger.debug(f"URL after 2FA timeout: {current_url_after_timeout}")
# Check if we're actually on a success page despite the timeout
success_patterns = ['/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/summary', '/clientapps/accounts', '/positions', '/portfolio']
if any(pattern in current_url_after_timeout for pattern in success_patterns):
logger.info("2FA timeout, but URL indicates success - continuing")
else:
# Capture debug artifacts on 2FA failure
try:
png = await page.screenshot(full_page=True)
save_debug_artifact("debug_2fa_timeout.png", png)
html = await page.content()
save_debug_artifact("debug_2fa_timeout.html", html)
logger.debug("2FA timeout debug artifacts saved")
except Exception:
pass
# Try one more time with a shorter timeout to see if page redirected
logger.info("Attempting 2FA recovery check...")
try:
await page.wait_for_function(
'''() => {
const url = window.location.href;
const successPatterns = ['/app/', '/Apps/', '/accounts/', '/Areas/Accounts', '/summary', '/clientapps/accounts', '/positions', '/portfolio'];
return successPatterns.some(pattern => url.includes(pattern));
}''', timeout=10000
)
logger.info("2FA recovery successful")
except Exception:
logger.error("2FA recovery failed - login unsuccessful")
# Clean up bad cookies on MFA failure to prevent bad state
cookies_path = get_cookies_path()
try:
logger.warning("Removing invalid cookies after MFA failure to prevent bad state")
if os.path.exists(cookies_path):
os.remove(cookies_path)
logger.debug(f"Removed invalid cookies at {cookies_path}")
except Exception as cleanup_error:
logger.error(f"Failed to clean up cookies: {cleanup_error}")
raise
# Authorization code stage
elif 'SignOn/Auth' in current_url:
try:
await page.wait_for_function(
'''() => {
const url = window.location.href;
return url.includes('/app/') || url.includes('/Apps/') || url.includes('/accounts/') || url.includes('/Areas/Accounts');
}''', timeout=60000
)
except Exception:
logger.debug("OAuth token exchange timeout; attempting to continue")
# Try clicking continue/accept if present
try:
await page.wait_for_selector('button, input[type="submit"], a[href*="app"]', timeout=10000)
for sel in ['button:has-text("Continue")', 'button:has-text("Accept")', 'button:has-text("Allow")', 'input[type="submit"]', 'a[href*="/app/"]']:
try:
if await page.is_visible(sel):
await page.click(sel)
break
except Exception:
pass
except Exception:
pass
# Finalize
try:
await page.wait_for_load_state('domcontentloaded', timeout=5000)
except Exception:
pass
final_url = page.url
logger.debug(f"Final URL after OAuth flow: {final_url}")
if any(p in final_url for p in ['/app/', '/Apps/', '/accounts/', '/Areas/Accounts']):
cookies = await context.cookies()
# Convert Cookie objects to dictionaries for JSON serialization
cookie_dicts = [
{
'name': cookie.get('name', ''),
'value': cookie.get('value', ''),
'domain': cookie.get('domain', ''),
'path': cookie.get('path', ''),
'expires': cookie.get('expires', -1),
'httpOnly': cookie.get('httpOnly', False),
'secure': cookie.get('secure', False),
'sameSite': cookie.get('sameSite', 'Lax')
}
for cookie in cookies
]
cookies_path = get_cookies_path()
with open(cookies_path, 'w') as f:
json.dump(cookie_dicts, f, indent=2)
logger.debug("OAuth success; cookies saved")
await browser.close()
return cookie_dicts
except Exception as e:
logger.error(f"Login error: {e}")
# Failure path: capture artifacts
try:
png = await page.screenshot(full_page=True)
save_debug_artifact("debug_oauth_failed.png", png)
html = await page.content()
save_debug_artifact("debug_oauth_failed.html", html)
except Exception:
pass
# Clean up bad cookies on login failure to prevent bad state
cookies_path = get_cookies_path()
try:
logger.warning("Removing invalid cookies after login failure to prevent bad state")
if os.path.exists(cookies_path):
os.remove(cookies_path)
logger.debug(f"Removed invalid cookies at {cookies_path}")
except Exception as cleanup_error:
logger.error(f"Failed to clean up cookies: {cleanup_error}")
await browser.close()
return None
async def ensure_cookies() -> Optional[List[Dict[str, Any]]]:
"""Shared helper to ensure we have valid cookies.
Attempts to use existing `cookies.json` if it appears valid; otherwise performs
automated login using credentials from `config.json` when available.
IMPORTANT: Stale cookies can cause authentication failures even if they haven't
technically expired. This function implements:
1. Client-side validation (expiry time checks)
2. Fallback to fresh login if validation fails
3. Automatic cleanup of stale cookies on login attempt
"""
logger = logging.getLogger(__name__)
cookies_path = get_cookies_path()
# Try existing cookies if they appear to contain a valid session
try:
if await is_session_valid():
logger.debug("Existing cookies appear valid, attempting to load...")
try:
with open(cookies_path, 'r') as f:
cookies = json.load(f)
if cookies:
logger.info(f"Using {len(cookies)} cached cookies from disk")
return cookies
except (FileNotFoundError, json.JSONDecodeError):
logger.debug("Could not load valid cookies from disk")
except Exception as e:
logger.debug(f"Cookie validation failed: {e}")
# If we reach here, existing cookies are not valid
logger.info("Existing cookies not valid or not found. Attempting fresh login...")
# Attempt automated login using config credentials
try:
from ..core.config import load_config, get_schwab_credentials
config = load_config()
username, password = get_schwab_credentials(config)
if username and password:
# IMPORTANT: Clear stale cookies before attempting new login
# This prevents authentication failures from mixing old session state with new credentials
try:
if os.path.exists(cookies_path):
logger.debug(f"Clearing stale cookies before fresh login attempt: {cookies_path}")
os.remove(cookies_path)
except Exception as cleanup_error:
logger.warning(f"Failed to clear stale cookies: {cleanup_error}")
logger.info("Starting fresh login process...")
cookies = await login_to_schwab(username, password)
if cookies:
logger.info(f"Fresh login successful, obtained {len(cookies)} cookies")
return cookies
else:
logger.error("Fresh login failed to produce cookies")
except Exception as e:
logger.error(f"Login attempt failed: {e}")
logger.error("Unable to establish valid session")
return None
# ----- Helpers migrated from legacy scraper -----
async def find_login_fields_dynamically(iframe) -> Tuple[Optional[str], Optional[str]]:
"""Try multiple strategies to find username/password fields inside iframe."""
logger = logging.getLogger(__name__)
try:
# Strategy 1: Form-based
forms = await iframe.query_selector_all('form')
for form in forms:
text_inputs = await form.query_selector_all('input[type="text"], input[type="email"], input:not([type])')
pwd_inputs = await form.query_selector_all('input[type="password"]')
if text_inputs and pwd_inputs:
async def sel(inp):
ac = (await inp.get_attribute('autocomplete')) or ''
iid = (await inp.get_attribute('id')) or ''
nm = (await inp.get_attribute('name')) or ''
if ac: return f'input[autocomplete="{ac}"]'
if iid: return f'#{iid}'
if nm: return f'input[name="{nm}"]'
return 'input[type="text"], input[type="email"], input:not([type])'
return await sel(text_inputs[0]), await sel(pwd_inputs[0])
# Strategy 2: Proximity/attributes
password_fields = await iframe.query_selector_all('input[type="password"]')
for pwd in password_fields:
pwd_id = (await pwd.get_attribute('id')) or ''
pwd_name = (await pwd.get_attribute('name')) or ''
ac = (await pwd.get_attribute('autocomplete')) or ''
pwd_sel = 'input[autocomplete="current-password"]' if ac == 'current-password' else (f'#{pwd_id}' if pwd_id else (f'input[name="{pwd_name}"]' if pwd_name else 'input[type="password"]'))
for cand in [
'input[autocomplete="username"]', 'input[type="email"]', 'input[name*="login" i]',
'input[id*="login" i]', 'input[name*="user" i]', 'input[id*="user" i]',
'input[aria-label*="Login" i]', 'input[placeholder*="Login" i]', 'input[placeholder*="User" i]', 'input[type="text"]'
]:
try:
if await iframe.is_visible(cand):
return cand, pwd_sel
except Exception:
pass
# Strategy 3: Scoring
all_inputs = await iframe.query_selector_all('input')
username_candidates: List[Tuple[str, int]] = []
password_candidates: List[str] = []
for el in all_inputs:
input_type = (await el.get_attribute('type')) or ''
name = (await el.get_attribute('name')) or ''
iid = (await el.get_attribute('id')) or ''
placeholder = (await el.get_attribute('placeholder')) or ''
aria = (await el.get_attribute('aria-label')) or ''
ac = (await el.get_attribute('autocomplete')) or ''
if input_type.lower() in ['text', 'email', ''] and input_type.lower() != 'password':
score = 0
text = f"{name} {iid} {placeholder} {aria}".lower()
for kw in ['login', 'user', 'email', 'username', 'id', 'account']:
if kw in text: score += 1
if ac.lower() == 'username': score += 3
is_vis = await iframe.is_visible(f'input[name="{name}"]' if name else (f'#{iid}' if iid else 'input'))
if is_vis: score += 2
if score > 0:
selector = f'input[autocomplete="{ac}"]' if ac else (f'input[name="{name}"]' if name else (f'#{iid}' if iid else None))
if selector: username_candidates.append((selector, score))
if input_type.lower() == 'password':
is_vis = await iframe.is_visible(f'input[name="{name}"]' if name else (f'#{iid}' if iid else 'input[type="password"]'))
if is_vis:
selector = f'input[autocomplete="{ac}"]' if ac else (f'input[name="{name}"]' if name else (f'#{iid}' if iid else 'input[type="password"]'))
password_candidates.append(selector)
if username_candidates and password_candidates:
return max(username_candidates, key=lambda x: x[1])[0], password_candidates[0]
return None, None
except Exception as e:
logger.debug(f"Dynamic detection error: {e}")
return None, None
async def resolve_login_inner_frame(iframe_root):
"""Some deployments nest the actual login form inside another iframe."""
try:
try:
if await iframe_root.query_selector('input[type="password"]'):
return iframe_root
except Exception:
pass
child_iframes = await iframe_root.query_selector_all('iframe')
for child in child_iframes:
try:
sub = await child.content_frame()
if not sub:
continue
await sub.wait_for_load_state('domcontentloaded', timeout=5000)
if await sub.query_selector('input[type="password"]'):
return sub
except Exception:
continue
return iframe_root
except Exception:
return iframe_root
async def robust_iframe_wait(page, iframe_selector: str = '#lmsIframe', max_retries: int = 3, timeout: int = 30000):
"""Robustly wait for login iframe with retries and multiple strategies."""
logger = logging.getLogger(__name__)
for attempt in range(max_retries):
try:
try:
await page.wait_for_selector(iframe_selector, timeout=timeout // max_retries)
iframe_element = await page.wait_for_selector(iframe_selector)
iframe = await iframe_element.content_frame()
if iframe:
await iframe.wait_for_load_state('domcontentloaded', timeout=10000)
return iframe_element, iframe
except Exception:
pass
try:
iframes = await page.query_selector_all('iframe')
for iframe_elem in iframes:
iframe_id = await iframe_elem.get_attribute('id')
if 'lms' in (iframe_id or '').lower():
iframe = await iframe_elem.content_frame()
if iframe:
await iframe.wait_for_load_state('domcontentloaded', timeout=5000)
return iframe_elem, iframe
except Exception:
pass
try:
iframe_elems = await page.query_selector_all('iframe')
for iframe_elem in iframe_elems:
src = await iframe_elem.get_attribute('src') or ''
if any(k in src.lower() for k in ['login', 'auth', 'signin']):
iframe = await iframe_elem.content_frame()
if iframe:
await iframe.wait_for_load_state('domcontentloaded', timeout=5000)
return iframe_elem, iframe
except Exception:
pass
if attempt < max_retries - 1:
await page.wait_for_timeout(2000)
except Exception:
if attempt < max_retries - 1:
await page.wait_for_timeout(2000)
logger.debug("Failed to find login iframe after all attempts")
return None, None