from __future__ import annotations import json from typing import Optional, Dict, Any, List from datetime import datetime, timezone import re from ...browser.auth import ensure_cookies from ...core.config import load_config, get_playwright_url from ...browser.client import connect, new_context, new_page from ...browser.navigation import goto_with_auth_check from .scraper import ( perform_export_download, perform_export_download_enhanced, discover_accounts_from_page, discover_accounts_with_numbers, ) from .parser import parse_csv_content from ...storage.cache import ( write_cached_transaction_csv, read_cached_transaction_csv, TRANSACTION_CACHE_DIR, ) from ...core.models import AccountInfo, TransactionData from ...core import Envelope, ErrorType, fail, ok import os async def _get_transaction_history_enhanced_impl( account: Optional[str] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, time_period: Optional[str] = None, debug: bool = False, ) -> Envelope[TransactionData]: """ Enhanced export with reliable account switching and filename preservation. Args: account: Account identifier (ending digits like '674', type like 'PLA Assets', or full label like 'PLA_Assets_XXX674'). ✅ ENHANCED: Now supports reliable automatic account switching with verification. start_date, end_date: Reserved for future "Custom" range support. time_period: One of pre-defined periods (e.g., "Current Month", "Last 6 Months"). If None, uses page default. debug: Enable debug logging and screenshots. Returns: Dict with transaction data, account info, and export metadata. """ print("Starting enhanced transaction export...") if debug: print(f" Account: {account}") print(f" Time period: {time_period}") # Load configuration and cookies config = load_config() playwright_url = get_playwright_url(config) cookies = await ensure_cookies() if not cookies: return fail( "Could not establish session. Check credentials or manually refresh cookies.json.", ErrorType.AUTHENTICATION, retryable=False, ) # Connect to browser p, browser = await connect(playwright_url) context = None page = None try: context = await new_context(browser, cookies=cookies) page = await new_page(context) # Use the enhanced export function export_result = await perform_export_download_enhanced( page=page, time_period=time_period, account=account, debug=debug, context=context, preserve_filename=True ) if not export_result.get("success"): # Try fallback to cached data if account: if debug: print("Enhanced export failed, trying cached fallback...") # Determine account label for cache lookup account_label = account if account.isdigit(): # Try to discover accounts to find full label try: accounts = await discover_accounts_with_numbers(page, debug=debug) for acc in accounts: if acc['ending'] == account[-3:]: account_label = acc['label'] break except Exception: pass cached_bytes = read_cached_transaction_csv(account_label) if cached_bytes: if debug: print(f"Using cached data for {account_label}") # Parse the cached CSV bytes records = parse_csv_content(cached_bytes) # Build account info from the label account_type = account_label.split('_')[0] if '_' in account_label else "Unknown" account_ending = account_label[-3:] if account_label[-3:].isdigit() else "000" data = TransactionData( account_info=AccountInfo( account_type=account_type, account_ending=account_ending, full_description=account_label, is_selected=True, ), transactions=records, date_range=time_period or "Unknown", export_date="Unknown", total_transactions=len(records), source="cache", ) return ok(data) return fail( export_result.get("error", "Enhanced export failed."), ErrorType.UNKNOWN, retryable=True, ) # Parse the exported CSV saved_path = export_result.get("saved_path") if not saved_path or not os.path.exists(saved_path): return fail("Export file not found after download", ErrorType.PARSING, retryable=True) with open(saved_path, 'r', encoding='utf-8') as f: csv_content = f.read() parsed_data = parse_csv_content(csv_content.encode('utf-8')) if not parsed_data: return fail("Failed to parse CSV: No transactions found", ErrorType.PARSING, retryable=True) # Build response account_info = export_result.get("account_info", {}) transactions = parsed_data # Cache the results if account_info.get("account_ending"): account_label = f"{account_info.get('account_type', 'Unknown')}_XXX{account_info.get('account_ending')}" try: # Generate timestamp for filename timestamp = datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S') # Convert transactions back to CSV format for caching import csv import io # Create CSV content from transactions output = io.StringIO() writer = csv.writer(output) # Write header writer.writerow(["Date", "Action", "Symbol", "Description", "Quantity", "Price", "Fees & Comm", "Amount"]) # Write transaction data for transaction in transactions: writer.writerow([ transaction.date, transaction.action, transaction.symbol or "", transaction.description, transaction.quantity or "", transaction.price or "", transaction.fees_comm or "", transaction.amount or "" ]) csv_bytes = output.getvalue().encode('utf-8') write_cached_transaction_csv(account_label, timestamp, csv_bytes) if debug: print(f"Cached transaction data for {account_label}") except Exception as e: if debug: print(f"Failed to cache data: {e}") data = TransactionData( account_info=AccountInfo( account_type=account_info.get("account_type", "Unknown"), account_ending=account_info.get("account_ending", "000"), full_description=account_info.get("full_description", ""), is_selected=account_info.get("is_selected", True), ), transactions=transactions, date_range=time_period or "Unknown", export_date=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'), total_transactions=len(transactions), source="live", ) if debug: print(f"✅ Enhanced export successful: {len(transactions)} transactions") return ok(data) except Exception as e: if debug: print(f"Enhanced export exception: {e}") import traceback traceback.print_exc() return fail(f"Enhanced export failed: {str(e)}", ErrorType.UNKNOWN, retryable=True) finally: if page: await page.close() if context: await context.close() if browser: await browser.close() async def _ensure_cookies() -> Optional[List[Dict[str, Any]]]: # Delegate to shared helper return await ensure_cookies() def _get_latest_cache_csv_filename(account_label: str) -> Optional[str]: """Return the most recent CSV filename under the account's cache directory, if any.""" import os dir_path = os.path.join(TRANSACTION_CACHE_DIR, account_label) if not os.path.isdir(dir_path): return None csv_files = [f for f in os.listdir(dir_path) if f.lower().endswith('.csv')] if not csv_files: return None # Sort by mtime if possible; fall back to lexical try: csv_files.sort(key=lambda f: os.path.getmtime(os.path.join(dir_path, f))) except Exception: csv_files.sort() return csv_files[-1] def _is_cache_fresh_for_label(account_label: str, max_age_hours: int = 24) -> bool: """Return True if the most recent CSV for `account_label` is within `max_age_hours`.""" import os, time dir_path = os.path.join(TRANSACTION_CACHE_DIR, account_label) if not os.path.isdir(dir_path): return False csv_files = [f for f in os.listdir(dir_path) if f.lower().endswith('.csv')] if not csv_files: return False # Use mtime (file creation/update time) to assess freshness newest_path = max((os.path.join(dir_path, f) for f in csv_files), key=lambda p: os.path.getmtime(p)) age_seconds = time.time() - os.path.getmtime(newest_path) return age_seconds <= max_age_hours * 3600 def _match_account_label_from_cache(account_query: Optional[str]) -> Optional[str]: """Resolve a matching account label from cache directories given a query like '604' or 'PLA_Assets_XXX674'. Only returns a label if a fresh (<=24h) CSV exists for that label. """ import os if not os.path.isdir(TRANSACTION_CACHE_DIR): return None labels = [name for name in os.listdir(TRANSACTION_CACHE_DIR) if os.path.isdir(os.path.join(TRANSACTION_CACHE_DIR, name))] if not labels: return None def label_matches(label: str, query: str) -> bool: if not query: return True if query == label: return True # match by ending digits if query.isdigit() and label.endswith(query): return True # substring match (e.g., 'PLA_Assets') if query.lower() in label.lower(): return True return False # If no query provided: return latest fresh label if any if not account_query: fresh_labels = [lbl for lbl in labels if _is_cache_fresh_for_label(lbl)] if not fresh_labels: return None fresh_labels.sort(key=lambda n: os.path.getmtime(os.path.join(TRANSACTION_CACHE_DIR, n)), reverse=True) return fresh_labels[0] # Query provided: only return a matching fresh label for lbl in labels: if label_matches(lbl, account_query) and _is_cache_fresh_for_label(lbl): return lbl # No fresh matching label return None async def _get_transaction_history_impl( account: Optional[str] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, time_period: Optional[str] = None, debug: bool = False, ) -> Envelope[TransactionData]: """ Export and parse transaction history for the selected account. Args: account: Account identifier (ending digits like '604', name like 'Joint', or full label like 'PLA_Assets_XXX674'). ⚠️ IMPORTANT: Due to Schwab's website design, automatic account switching causes browser crashes. If the wrong account is selected, you'll get clear instructions to manually select the correct account first. start_date, end_date: Reserved for future "Custom" range support. time_period: One of pre-defined periods (e.g., "Current Month", "Last 6 Months"). If None, uses page default. """ # Basic input validation for optional custom date params def _parse_date(date_str: str) -> Optional[datetime]: # Accept YYYY-MM-DD or MM/DD/YYYY if re.fullmatch(r"\d{4}-\d{2}-\d{2}", date_str): try: return datetime.strptime(date_str, "%Y-%m-%d") except ValueError: return None if re.fullmatch(r"\d{2}/\d{2}/\d{4}", date_str): try: return datetime.strptime(date_str, "%m/%d/%Y") except ValueError: return None return None if start_date: start_dt = _parse_date(start_date) if not start_dt: return fail(f"Invalid start_date format: '{start_date}'. Use YYYY-MM-DD or MM/DD/YYYY.", ErrorType.VALIDATION, retryable=False) else: start_dt = None if end_date: end_dt = _parse_date(end_date) if not end_dt: return fail(f"Invalid end_date format: '{end_date}'. Use YYYY-MM-DD or MM/DD/YYYY.", ErrorType.VALIDATION, retryable=False) else: end_dt = None if start_dt and end_dt and start_dt > end_dt: return fail( "start_date must be on or before end_date", ErrorType.VALIDATION, retryable=False, ) cookies = await _ensure_cookies() if not cookies: account_label = _match_account_label_from_cache(account) if account_label: cached_bytes = read_cached_transaction_csv(account_label) if cached_bytes: records = parse_csv_content(cached_bytes) export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC') account_info = AccountInfo( account_type=account_label.split('_')[0], account_ending=account_label[-3:], full_description=account_label, is_selected=True, ) data = TransactionData( account_info=account_info, transactions=records, date_range=time_period or "Cache", export_date=export_date, total_transactions=len(records), source="cache", ) return ok(data) return fail( "Unable to establish a session. Provide credentials in config.json or a valid cookies.json.", ErrorType.AUTHENTICATION, retryable=False, ) config = load_config() playwright_url = get_playwright_url(config) p, browser = await connect(playwright_url) context = None page = None try: context = await new_context(browser, cookies=cookies) page = await new_page(context) try: download = await perform_export_download( page, time_period=time_period, account=account, debug=debug, context=context, ) csv_bytes = download["content"] account_label = download["label"] ts = download["ts"] # Cache write_cached_transaction_csv(account_label, ts, csv_bytes) # Parse records = parse_csv_content(csv_bytes) # Build metadata export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC') account_info = AccountInfo( account_type=account_label.split('_')[0], account_ending=account_label[-3:], full_description=account_label, is_selected=True, ) data = TransactionData( account_info=account_info, transactions=records, date_range=time_period or "Page Default", export_date=export_date, total_transactions=len(records), source="live", ) return ok(data) except Exception as e: # First failure: attempt one reconnect and retry, then fallback to cache if debug: try: print(f"DEBUG: perform_export_download failed: {type(e).__name__}: {e}") except Exception: pass # Attempt one reconnect if browser/context appears closed try: # Cleanup previous if possible try: if context is not None: await context.close() except Exception: pass try: await browser.close() except Exception: pass try: await p.stop() except Exception: pass # Reconnect p, browser = await connect(playwright_url) context = await new_context(browser, cookies=cookies) page = await new_page(context) # Retry export if debug: print("DEBUG: Retrying perform_export_download after reconnect...") download = await perform_export_download( page, time_period=time_period, account=account, debug=debug, context=context, ) csv_bytes = download["content"] account_label = download["label"] ts = download["ts"] # Cache write_cached_transaction_csv(account_label, ts, csv_bytes) # Parse records = parse_csv_content(csv_bytes) export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC') account_info = AccountInfo( account_type=account_label.split('_')[0], account_ending=account_label[-3:], full_description=account_label, is_selected=True, ) data = TransactionData( account_info=account_info, transactions=records, date_range=time_period or "Page Default", export_date=export_date, total_transactions=len(records), source="live", ) return ok(data) except Exception as e2: if debug: try: print(f"DEBUG: Retry after reconnect failed: {type(e2).__name__}: {e2}") except Exception: pass # Fall back to cache if available and fresh account_label = _match_account_label_from_cache(account) if account_label: cached_bytes = read_cached_transaction_csv(account_label) if cached_bytes: records = parse_csv_content(cached_bytes) export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC') account_info = AccountInfo( account_type=account_label.split('_')[0], account_ending=account_label[-3:], full_description=account_label, is_selected=True, ) data = TransactionData( account_info=account_info, transactions=records, date_range=time_period or "Cache", export_date=export_date, total_transactions=len(records), source="cache", ) return ok(data) return fail("Export failed and no fresh cache available", ErrorType.UNKNOWN, retryable=True) except Exception as e: return fail(str(e), ErrorType.UNKNOWN, retryable=True) finally: try: if context is not None: await context.close() except Exception: pass try: await browser.close() except Exception: pass try: await p.stop() except Exception: pass def _get_cache_accounts(debug: bool = False) -> List[Dict[str, Any]]: """Get accounts from cache directory fallback with enhanced validation.""" from ...storage.cache import TRANSACTION_CACHE_DIR import os from datetime import datetime if not os.path.isdir(TRANSACTION_CACHE_DIR): if debug: print(f"DEBUG: Cache directory does not exist: {TRANSACTION_CACHE_DIR}") return [] out = [] cache_dirs = [] # Collect all cache directories with metadata for name in os.listdir(TRANSACTION_CACHE_DIR): path = os.path.join(TRANSACTION_CACHE_DIR, name) if os.path.isdir(path): try: # Get directory modification time and file count stat = os.stat(path) csv_files = [f for f in os.listdir(path) if f.endswith('.csv')] cache_dirs.append({ 'name': name, 'path': path, 'mtime': stat.st_mtime, 'csv_count': len(csv_files), 'csv_files': csv_files }) except Exception as e: if debug: print(f"DEBUG: Error processing cache dir {name}: {e}") continue # Sort by modification time (most recent first) to prioritize active accounts cache_dirs.sort(key=lambda x: x['mtime'], reverse=True) if debug: print(f"DEBUG: Found {len(cache_dirs)} cache directories") for cache_info in cache_dirs: name = cache_info['name'] csv_files = cache_info['csv_files'] if not csv_files: if debug: print(f"DEBUG: Skipping {name} - no CSV files") continue try: # Normalize using filename parser to ensure consistent label normalized_label = name account_type = None account_ending = None # Strategy 1: Use directory name if it matches expected pattern if re.match(r"^[A-Za-z_]+_XXX\d{3,4}$", name): normalized_label = name parts = name.split('_XXX') account_type = parts[0].replace('_', ' ') account_ending = parts[1] if len(parts) > 1 else name[-3:] else: # Strategy 2: Parse from most recent CSV filename try: from .scraper import parse_suggested_filename latest_csv = sorted(csv_files)[-1] # Get most recent file parsed_filename = parse_suggested_filename(latest_csv) normalized_label = parsed_filename["label"] # Extract type and ending from parsed label if '_XXX' in normalized_label: parts = normalized_label.split('_XXX') account_type = parts[0].replace('_', ' ') account_ending = parts[1] if len(parts) > 1 else normalized_label[-3:] except Exception as e: if debug: print(f"DEBUG: Failed to parse filename for {name}: {e}") # Strategy 3: Fallback to directory name parsing normalized_label = name account_type = name account_ending = name[-3:] if name[-3:].isdigit() else "000" # Validate the parsed data if not account_ending or not account_ending.isdigit() or len(account_ending) < 3: if debug: print(f"DEBUG: Invalid account ending for {name}: {account_ending}") continue # Create account entry account_entry = { "label": normalized_label, "type": account_type or normalized_label.split('_')[0], "ending": account_ending[-3:], # Ensure 3 digits "cache_info": { "last_updated": datetime.fromtimestamp(cache_info['mtime']).isoformat(), "csv_count": cache_info['csv_count'] } } out.append(account_entry) if debug: print(f"DEBUG: Added cache account: {normalized_label} ({account_type} ending {account_ending[-3:]}) - {cache_info['csv_count']} files") except Exception as e: if debug: print(f"DEBUG: Error processing cache account {name}: {e}") continue if debug: print(f"DEBUG: Successfully processed {len(out)} accounts from cache") if not out: print(f"DEBUG: Cache directory contents: {os.listdir(TRANSACTION_CACHE_DIR) if os.path.isdir(TRANSACTION_CACHE_DIR) else 'N/A'}") return out async def _list_available_accounts_impl(debug: bool = False) -> List[Dict[str, Any]]: """Return list of available accounts from live page when possible; fall back to cache with enhanced reliability.""" if debug: print("DEBUG: Starting account listing with enhanced discovery...") # Try live discovery with enhanced error handling cookies = await _ensure_cookies() if cookies: if debug: print("DEBUG: Session cookies available, attempting live account discovery...") config = load_config() playwright_url = get_playwright_url(config) p, browser = await connect(playwright_url) context = None page = None try: context = await new_context(browser, cookies=cookies) page = await new_page(context) # Use centralized auth-aware navigation with retry max_auth_attempts = 2 auth_success = False for auth_attempt in range(max_auth_attempts): if debug: print(f"DEBUG: Authentication attempt {auth_attempt + 1}/{max_auth_attempts}...") auth_success = await goto_with_auth_check(page, context, "https://client.schwab.com/app/accounts/history/#/", debug=debug) if auth_success: break elif auth_attempt < max_auth_attempts - 1: if debug: print("DEBUG: Authentication failed, retrying...") await page.wait_for_timeout(3000) if not auth_success: if debug: print("DEBUG: All authentication attempts failed") raise Exception("Authentication failed after multiple attempts") if debug: print("DEBUG: Successfully authenticated, discovering accounts from live dropdown...") # Enhanced account discovery with fallback strategies accounts = [] try: accounts = await discover_accounts_from_page(page, debug=debug) if debug: print(f"DEBUG: Live account discovery returned {len(accounts)} accounts") except Exception as e: if debug: print(f"DEBUG: Live account discovery failed: {e}") accounts = [] # Enhanced result processing if accounts: if debug: print(f"DEBUG: Successfully discovered {len(accounts)} accounts from live page:") for acc in accounts: print(f"DEBUG: - {acc['label']} ({acc['type']} ending {acc['ending']})") # Always try to enrich with cache data for completeness cache_accounts = _get_cache_accounts(debug=debug) if cache_accounts: if debug: print(f"DEBUG: Found {len(cache_accounts)} accounts in cache, merging...") # Merge live and cache, preferring live data but keeping unique cache entries combined = {acc['ending']: acc for acc in cache_accounts} live_endings = set() for live_acc in accounts: combined[live_acc['ending']] = live_acc # Live data takes precedence live_endings.add(live_acc['ending']) result = list(combined.values()) if debug: print(f"DEBUG: Final merged result: {len(result)} accounts") for acc in result: source = "live" if acc['ending'] in live_endings else "cache" print(f"DEBUG: - {acc['label']} ({acc['type']} ending {acc['ending']}) [{source}]") return result else: if debug: print("DEBUG: No cache data available, returning live accounts only") return accounts else: if debug: print("DEBUG: No accounts discovered from live page, falling back to cache only") except Exception as e: if debug: print(f"DEBUG: Live account discovery failed with error: {e}") # Continue to cache fallback finally: # Enhanced cleanup cleanup_tasks = [] if context is not None: cleanup_tasks.append(context.close()) if browser is not None: cleanup_tasks.append(browser.close()) if p is not None: cleanup_tasks.append(p.stop()) for task in cleanup_tasks: try: await task except Exception: pass else: if debug: print("DEBUG: No session cookies available, skipping live discovery") # Enhanced cache fallback if debug: print("DEBUG: Using cache-only fallback for account listing...") cache_accounts = _get_cache_accounts(debug=debug) if cache_accounts: if debug: print(f"DEBUG: Successfully retrieved {len(cache_accounts)} accounts from cache") return cache_accounts else: if debug: print("DEBUG: No accounts found in cache either") return [] async def list_available_accounts(debug: bool = False) -> Envelope[List[Dict[str, Any]]]: try: accounts = await _list_available_accounts_impl(debug=debug) return ok(accounts) except Exception as exc: return fail(str(exc), ErrorType.UNKNOWN, retryable=True) async def get_transaction_history( account: Optional[str] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, time_period: Optional[str] = None, debug: bool = False, ) -> Envelope[TransactionData]: return await _get_transaction_history_impl( account=account, start_date=start_date, end_date=end_date, time_period=time_period, debug=debug, ) async def get_transaction_history_enhanced( account: Optional[str] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, time_period: Optional[str] = None, debug: bool = False, ) -> Envelope[TransactionData]: return await _get_transaction_history_enhanced_impl( account=account, start_date=start_date, end_date=end_date, time_period=time_period, debug=debug, )