Fix build: Bundle schwab_scraper source and use local dependencies
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
All checks were successful
Build and Push Docker Image / build (push) Successful in 34s
This commit is contained in:
0
schwab_scraper/features/transactions/__init__.py
Normal file
0
schwab_scraper/features/transactions/__init__.py
Normal file
47
schwab_scraper/features/transactions/parser.py
Normal file
47
schwab_scraper/features/transactions/parser.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
from dataclasses import asdict
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from ...core.models import TransactionRecord, TransactionData, AccountInfo
|
||||
|
||||
|
||||
def parse_csv_content(csv_bytes: bytes) -> List[TransactionRecord]:
|
||||
"""
|
||||
Parse Schwab transaction CSV bytes into a list of TransactionRecord.
|
||||
|
||||
Expected headers:
|
||||
Date,Action,Symbol,Description,Quantity,Price,Fees & Comm,Amount
|
||||
"""
|
||||
text_stream = io.StringIO(csv_bytes.decode("utf-8"))
|
||||
reader = csv.DictReader(text_stream)
|
||||
|
||||
records: List[TransactionRecord] = []
|
||||
for row in reader:
|
||||
records.append(
|
||||
TransactionRecord(
|
||||
date=(row.get("Date") or "").strip(),
|
||||
action=(row.get("Action") or "").strip(),
|
||||
symbol=(row.get("Symbol") or None) or None,
|
||||
description=(row.get("Description") or "").strip(),
|
||||
quantity=(row.get("Quantity") or None) or None,
|
||||
price=(row.get("Price") or None) or None,
|
||||
fees_comm=(row.get("Fees & Comm") or None) or None,
|
||||
amount=(row.get("Amount") or None) or None,
|
||||
)
|
||||
)
|
||||
return records
|
||||
|
||||
|
||||
def to_dicts(transaction_data: TransactionData) -> Dict[str, Any]:
|
||||
"""Convert TransactionData to plain dicts for JSON output."""
|
||||
return {
|
||||
"account_info": asdict(transaction_data.account_info),
|
||||
"transactions": [asdict(r) for r in transaction_data.transactions],
|
||||
"date_range": transaction_data.date_range,
|
||||
"export_date": transaction_data.export_date,
|
||||
"total_transactions": transaction_data.total_transactions,
|
||||
"source": transaction_data.source,
|
||||
}
|
||||
2523
schwab_scraper/features/transactions/scraper.py
Normal file
2523
schwab_scraper/features/transactions/scraper.py
Normal file
File diff suppressed because it is too large
Load Diff
833
schwab_scraper/features/transactions/service.py
Normal file
833
schwab_scraper/features/transactions/service.py
Normal file
@@ -0,0 +1,833 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime, timezone
|
||||
import re
|
||||
|
||||
from ...browser.auth import ensure_cookies
|
||||
from ...core.config import load_config, get_playwright_url
|
||||
from ...browser.client import connect, new_context, new_page
|
||||
from ...browser.navigation import goto_with_auth_check
|
||||
from .scraper import (
|
||||
perform_export_download,
|
||||
perform_export_download_enhanced,
|
||||
discover_accounts_from_page,
|
||||
discover_accounts_with_numbers,
|
||||
)
|
||||
from .parser import parse_csv_content
|
||||
from ...storage.cache import (
|
||||
write_cached_transaction_csv,
|
||||
read_cached_transaction_csv,
|
||||
TRANSACTION_CACHE_DIR,
|
||||
)
|
||||
from ...core.models import AccountInfo, TransactionData
|
||||
from ...core import Envelope, ErrorType, fail, ok
|
||||
import os
|
||||
|
||||
|
||||
async def _get_transaction_history_enhanced_impl(
|
||||
account: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
time_period: Optional[str] = None,
|
||||
debug: bool = False,
|
||||
) -> Envelope[TransactionData]:
|
||||
"""
|
||||
Enhanced export with reliable account switching and filename preservation.
|
||||
|
||||
Args:
|
||||
account: Account identifier (ending digits like '674', type like 'PLA Assets', or full label like 'PLA_Assets_XXX674').
|
||||
✅ ENHANCED: Now supports reliable automatic account switching with verification.
|
||||
start_date, end_date: Reserved for future "Custom" range support.
|
||||
time_period: One of pre-defined periods (e.g., "Current Month", "Last 6 Months"). If None, uses page default.
|
||||
debug: Enable debug logging and screenshots.
|
||||
|
||||
Returns:
|
||||
Dict with transaction data, account info, and export metadata.
|
||||
"""
|
||||
print("Starting enhanced transaction export...")
|
||||
if debug:
|
||||
print(f" Account: {account}")
|
||||
print(f" Time period: {time_period}")
|
||||
|
||||
# Load configuration and cookies
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
cookies = await ensure_cookies()
|
||||
|
||||
if not cookies:
|
||||
return fail(
|
||||
"Could not establish session. Check credentials or manually refresh cookies.json.",
|
||||
ErrorType.AUTHENTICATION,
|
||||
retryable=False,
|
||||
)
|
||||
|
||||
# Connect to browser
|
||||
p, browser = await connect(playwright_url)
|
||||
context = None
|
||||
page = None
|
||||
|
||||
try:
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
# Use the enhanced export function
|
||||
export_result = await perform_export_download_enhanced(
|
||||
page=page,
|
||||
time_period=time_period,
|
||||
account=account,
|
||||
debug=debug,
|
||||
context=context,
|
||||
preserve_filename=True
|
||||
)
|
||||
|
||||
if not export_result.get("success"):
|
||||
# Try fallback to cached data
|
||||
if account:
|
||||
if debug:
|
||||
print("Enhanced export failed, trying cached fallback...")
|
||||
|
||||
# Determine account label for cache lookup
|
||||
account_label = account
|
||||
if account.isdigit():
|
||||
# Try to discover accounts to find full label
|
||||
try:
|
||||
accounts = await discover_accounts_with_numbers(page, debug=debug)
|
||||
for acc in accounts:
|
||||
if acc['ending'] == account[-3:]:
|
||||
account_label = acc['label']
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
cached_bytes = read_cached_transaction_csv(account_label)
|
||||
if cached_bytes:
|
||||
if debug:
|
||||
print(f"Using cached data for {account_label}")
|
||||
|
||||
# Parse the cached CSV bytes
|
||||
records = parse_csv_content(cached_bytes)
|
||||
|
||||
# Build account info from the label
|
||||
account_type = account_label.split('_')[0] if '_' in account_label else "Unknown"
|
||||
account_ending = account_label[-3:] if account_label[-3:].isdigit() else "000"
|
||||
|
||||
data = TransactionData(
|
||||
account_info=AccountInfo(
|
||||
account_type=account_type,
|
||||
account_ending=account_ending,
|
||||
full_description=account_label,
|
||||
is_selected=True,
|
||||
),
|
||||
transactions=records,
|
||||
date_range=time_period or "Unknown",
|
||||
export_date="Unknown",
|
||||
total_transactions=len(records),
|
||||
source="cache",
|
||||
)
|
||||
return ok(data)
|
||||
|
||||
return fail(
|
||||
export_result.get("error", "Enhanced export failed."),
|
||||
ErrorType.UNKNOWN,
|
||||
retryable=True,
|
||||
)
|
||||
|
||||
# Parse the exported CSV
|
||||
saved_path = export_result.get("saved_path")
|
||||
if not saved_path or not os.path.exists(saved_path):
|
||||
return fail("Export file not found after download", ErrorType.PARSING, retryable=True)
|
||||
|
||||
with open(saved_path, 'r', encoding='utf-8') as f:
|
||||
csv_content = f.read()
|
||||
|
||||
parsed_data = parse_csv_content(csv_content.encode('utf-8'))
|
||||
if not parsed_data:
|
||||
return fail("Failed to parse CSV: No transactions found", ErrorType.PARSING, retryable=True)
|
||||
|
||||
# Build response
|
||||
account_info = export_result.get("account_info", {})
|
||||
transactions = parsed_data
|
||||
|
||||
# Cache the results
|
||||
if account_info.get("account_ending"):
|
||||
account_label = f"{account_info.get('account_type', 'Unknown')}_XXX{account_info.get('account_ending')}"
|
||||
try:
|
||||
# Generate timestamp for filename
|
||||
timestamp = datetime.now(timezone.utc).strftime('%Y%m%d-%H%M%S')
|
||||
|
||||
# Convert transactions back to CSV format for caching
|
||||
import csv
|
||||
import io
|
||||
|
||||
# Create CSV content from transactions
|
||||
output = io.StringIO()
|
||||
writer = csv.writer(output)
|
||||
|
||||
# Write header
|
||||
writer.writerow(["Date", "Action", "Symbol", "Description", "Quantity", "Price", "Fees & Comm", "Amount"])
|
||||
|
||||
# Write transaction data
|
||||
for transaction in transactions:
|
||||
writer.writerow([
|
||||
transaction.date,
|
||||
transaction.action,
|
||||
transaction.symbol or "",
|
||||
transaction.description,
|
||||
transaction.quantity or "",
|
||||
transaction.price or "",
|
||||
transaction.fees_comm or "",
|
||||
transaction.amount or ""
|
||||
])
|
||||
|
||||
csv_bytes = output.getvalue().encode('utf-8')
|
||||
write_cached_transaction_csv(account_label, timestamp, csv_bytes)
|
||||
|
||||
if debug:
|
||||
print(f"Cached transaction data for {account_label}")
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"Failed to cache data: {e}")
|
||||
|
||||
data = TransactionData(
|
||||
account_info=AccountInfo(
|
||||
account_type=account_info.get("account_type", "Unknown"),
|
||||
account_ending=account_info.get("account_ending", "000"),
|
||||
full_description=account_info.get("full_description", ""),
|
||||
is_selected=account_info.get("is_selected", True),
|
||||
),
|
||||
transactions=transactions,
|
||||
date_range=time_period or "Unknown",
|
||||
export_date=datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC'),
|
||||
total_transactions=len(transactions),
|
||||
source="live",
|
||||
)
|
||||
|
||||
if debug:
|
||||
print(f"✅ Enhanced export successful: {len(transactions)} transactions")
|
||||
|
||||
return ok(data)
|
||||
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"Enhanced export exception: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return fail(f"Enhanced export failed: {str(e)}", ErrorType.UNKNOWN, retryable=True)
|
||||
|
||||
finally:
|
||||
if page:
|
||||
await page.close()
|
||||
if context:
|
||||
await context.close()
|
||||
if browser:
|
||||
await browser.close()
|
||||
|
||||
|
||||
async def _ensure_cookies() -> Optional[List[Dict[str, Any]]]:
|
||||
# Delegate to shared helper
|
||||
return await ensure_cookies()
|
||||
|
||||
|
||||
def _get_latest_cache_csv_filename(account_label: str) -> Optional[str]:
|
||||
"""Return the most recent CSV filename under the account's cache directory, if any."""
|
||||
import os
|
||||
dir_path = os.path.join(TRANSACTION_CACHE_DIR, account_label)
|
||||
if not os.path.isdir(dir_path):
|
||||
return None
|
||||
csv_files = [f for f in os.listdir(dir_path) if f.lower().endswith('.csv')]
|
||||
if not csv_files:
|
||||
return None
|
||||
# Sort by mtime if possible; fall back to lexical
|
||||
try:
|
||||
csv_files.sort(key=lambda f: os.path.getmtime(os.path.join(dir_path, f)))
|
||||
except Exception:
|
||||
csv_files.sort()
|
||||
return csv_files[-1]
|
||||
|
||||
|
||||
def _is_cache_fresh_for_label(account_label: str, max_age_hours: int = 24) -> bool:
|
||||
"""Return True if the most recent CSV for `account_label` is within `max_age_hours`."""
|
||||
import os, time
|
||||
dir_path = os.path.join(TRANSACTION_CACHE_DIR, account_label)
|
||||
if not os.path.isdir(dir_path):
|
||||
return False
|
||||
csv_files = [f for f in os.listdir(dir_path) if f.lower().endswith('.csv')]
|
||||
if not csv_files:
|
||||
return False
|
||||
# Use mtime (file creation/update time) to assess freshness
|
||||
newest_path = max((os.path.join(dir_path, f) for f in csv_files), key=lambda p: os.path.getmtime(p))
|
||||
age_seconds = time.time() - os.path.getmtime(newest_path)
|
||||
return age_seconds <= max_age_hours * 3600
|
||||
|
||||
|
||||
def _match_account_label_from_cache(account_query: Optional[str]) -> Optional[str]:
|
||||
"""Resolve a matching account label from cache directories given a query like '604' or 'PLA_Assets_XXX674'.
|
||||
Only returns a label if a fresh (<=24h) CSV exists for that label.
|
||||
"""
|
||||
import os
|
||||
if not os.path.isdir(TRANSACTION_CACHE_DIR):
|
||||
return None
|
||||
labels = [name for name in os.listdir(TRANSACTION_CACHE_DIR)
|
||||
if os.path.isdir(os.path.join(TRANSACTION_CACHE_DIR, name))]
|
||||
if not labels:
|
||||
return None
|
||||
|
||||
def label_matches(label: str, query: str) -> bool:
|
||||
if not query:
|
||||
return True
|
||||
if query == label:
|
||||
return True
|
||||
# match by ending digits
|
||||
if query.isdigit() and label.endswith(query):
|
||||
return True
|
||||
# substring match (e.g., 'PLA_Assets')
|
||||
if query.lower() in label.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
# If no query provided: return latest fresh label if any
|
||||
if not account_query:
|
||||
fresh_labels = [lbl for lbl in labels if _is_cache_fresh_for_label(lbl)]
|
||||
if not fresh_labels:
|
||||
return None
|
||||
fresh_labels.sort(key=lambda n: os.path.getmtime(os.path.join(TRANSACTION_CACHE_DIR, n)), reverse=True)
|
||||
return fresh_labels[0]
|
||||
|
||||
# Query provided: only return a matching fresh label
|
||||
for lbl in labels:
|
||||
if label_matches(lbl, account_query) and _is_cache_fresh_for_label(lbl):
|
||||
return lbl
|
||||
|
||||
# No fresh matching label
|
||||
return None
|
||||
|
||||
|
||||
async def _get_transaction_history_impl(
|
||||
account: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
time_period: Optional[str] = None,
|
||||
debug: bool = False,
|
||||
) -> Envelope[TransactionData]:
|
||||
"""
|
||||
Export and parse transaction history for the selected account.
|
||||
|
||||
Args:
|
||||
account: Account identifier (ending digits like '604', name like 'Joint', or full label like 'PLA_Assets_XXX674').
|
||||
⚠️ IMPORTANT: Due to Schwab's website design, automatic account switching causes browser crashes.
|
||||
If the wrong account is selected, you'll get clear instructions to manually select the correct account first.
|
||||
start_date, end_date: Reserved for future "Custom" range support.
|
||||
time_period: One of pre-defined periods (e.g., "Current Month", "Last 6 Months"). If None, uses page default.
|
||||
"""
|
||||
# Basic input validation for optional custom date params
|
||||
def _parse_date(date_str: str) -> Optional[datetime]:
|
||||
# Accept YYYY-MM-DD or MM/DD/YYYY
|
||||
if re.fullmatch(r"\d{4}-\d{2}-\d{2}", date_str):
|
||||
try:
|
||||
return datetime.strptime(date_str, "%Y-%m-%d")
|
||||
except ValueError:
|
||||
return None
|
||||
if re.fullmatch(r"\d{2}/\d{2}/\d{4}", date_str):
|
||||
try:
|
||||
return datetime.strptime(date_str, "%m/%d/%Y")
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
if start_date:
|
||||
start_dt = _parse_date(start_date)
|
||||
if not start_dt:
|
||||
return fail(f"Invalid start_date format: '{start_date}'. Use YYYY-MM-DD or MM/DD/YYYY.", ErrorType.VALIDATION, retryable=False)
|
||||
else:
|
||||
start_dt = None
|
||||
|
||||
if end_date:
|
||||
end_dt = _parse_date(end_date)
|
||||
if not end_dt:
|
||||
return fail(f"Invalid end_date format: '{end_date}'. Use YYYY-MM-DD or MM/DD/YYYY.", ErrorType.VALIDATION, retryable=False)
|
||||
else:
|
||||
end_dt = None
|
||||
|
||||
if start_dt and end_dt and start_dt > end_dt:
|
||||
return fail(
|
||||
"start_date must be on or before end_date",
|
||||
ErrorType.VALIDATION,
|
||||
retryable=False,
|
||||
)
|
||||
|
||||
cookies = await _ensure_cookies()
|
||||
if not cookies:
|
||||
account_label = _match_account_label_from_cache(account)
|
||||
if account_label:
|
||||
cached_bytes = read_cached_transaction_csv(account_label)
|
||||
if cached_bytes:
|
||||
records = parse_csv_content(cached_bytes)
|
||||
export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
account_info = AccountInfo(
|
||||
account_type=account_label.split('_')[0],
|
||||
account_ending=account_label[-3:],
|
||||
full_description=account_label,
|
||||
is_selected=True,
|
||||
)
|
||||
data = TransactionData(
|
||||
account_info=account_info,
|
||||
transactions=records,
|
||||
date_range=time_period or "Cache",
|
||||
export_date=export_date,
|
||||
total_transactions=len(records),
|
||||
source="cache",
|
||||
)
|
||||
return ok(data)
|
||||
return fail(
|
||||
"Unable to establish a session. Provide credentials in config.json or a valid cookies.json.",
|
||||
ErrorType.AUTHENTICATION,
|
||||
retryable=False,
|
||||
)
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
|
||||
p, browser = await connect(playwright_url)
|
||||
context = None
|
||||
page = None
|
||||
try:
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
try:
|
||||
download = await perform_export_download(
|
||||
page,
|
||||
time_period=time_period,
|
||||
account=account,
|
||||
debug=debug,
|
||||
context=context,
|
||||
)
|
||||
csv_bytes = download["content"]
|
||||
account_label = download["label"]
|
||||
ts = download["ts"]
|
||||
|
||||
# Cache
|
||||
write_cached_transaction_csv(account_label, ts, csv_bytes)
|
||||
|
||||
# Parse
|
||||
records = parse_csv_content(csv_bytes)
|
||||
|
||||
# Build metadata
|
||||
export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
account_info = AccountInfo(
|
||||
account_type=account_label.split('_')[0],
|
||||
account_ending=account_label[-3:],
|
||||
full_description=account_label,
|
||||
is_selected=True,
|
||||
)
|
||||
data = TransactionData(
|
||||
account_info=account_info,
|
||||
transactions=records,
|
||||
date_range=time_period or "Page Default",
|
||||
export_date=export_date,
|
||||
total_transactions=len(records),
|
||||
source="live",
|
||||
)
|
||||
return ok(data)
|
||||
except Exception as e:
|
||||
# First failure: attempt one reconnect and retry, then fallback to cache
|
||||
if debug:
|
||||
try:
|
||||
print(f"DEBUG: perform_export_download failed: {type(e).__name__}: {e}")
|
||||
except Exception:
|
||||
pass
|
||||
# Attempt one reconnect if browser/context appears closed
|
||||
try:
|
||||
# Cleanup previous if possible
|
||||
try:
|
||||
if context is not None:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await p.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Reconnect
|
||||
p, browser = await connect(playwright_url)
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
# Retry export
|
||||
if debug:
|
||||
print("DEBUG: Retrying perform_export_download after reconnect...")
|
||||
download = await perform_export_download(
|
||||
page,
|
||||
time_period=time_period,
|
||||
account=account,
|
||||
debug=debug,
|
||||
context=context,
|
||||
)
|
||||
csv_bytes = download["content"]
|
||||
account_label = download["label"]
|
||||
ts = download["ts"]
|
||||
|
||||
# Cache
|
||||
write_cached_transaction_csv(account_label, ts, csv_bytes)
|
||||
|
||||
# Parse
|
||||
records = parse_csv_content(csv_bytes)
|
||||
|
||||
export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
account_info = AccountInfo(
|
||||
account_type=account_label.split('_')[0],
|
||||
account_ending=account_label[-3:],
|
||||
full_description=account_label,
|
||||
is_selected=True,
|
||||
)
|
||||
data = TransactionData(
|
||||
account_info=account_info,
|
||||
transactions=records,
|
||||
date_range=time_period or "Page Default",
|
||||
export_date=export_date,
|
||||
total_transactions=len(records),
|
||||
source="live",
|
||||
)
|
||||
return ok(data)
|
||||
except Exception as e2:
|
||||
if debug:
|
||||
try:
|
||||
print(f"DEBUG: Retry after reconnect failed: {type(e2).__name__}: {e2}")
|
||||
except Exception:
|
||||
pass
|
||||
# Fall back to cache if available and fresh
|
||||
account_label = _match_account_label_from_cache(account)
|
||||
if account_label:
|
||||
cached_bytes = read_cached_transaction_csv(account_label)
|
||||
if cached_bytes:
|
||||
records = parse_csv_content(cached_bytes)
|
||||
export_date = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
account_info = AccountInfo(
|
||||
account_type=account_label.split('_')[0],
|
||||
account_ending=account_label[-3:],
|
||||
full_description=account_label,
|
||||
is_selected=True,
|
||||
)
|
||||
data = TransactionData(
|
||||
account_info=account_info,
|
||||
transactions=records,
|
||||
date_range=time_period or "Cache",
|
||||
export_date=export_date,
|
||||
total_transactions=len(records),
|
||||
source="cache",
|
||||
)
|
||||
return ok(data)
|
||||
return fail("Export failed and no fresh cache available", ErrorType.UNKNOWN, retryable=True)
|
||||
|
||||
except Exception as e:
|
||||
return fail(str(e), ErrorType.UNKNOWN, retryable=True)
|
||||
|
||||
finally:
|
||||
try:
|
||||
if context is not None:
|
||||
await context.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await p.stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _get_cache_accounts(debug: bool = False) -> List[Dict[str, Any]]:
|
||||
"""Get accounts from cache directory fallback with enhanced validation."""
|
||||
from ...storage.cache import TRANSACTION_CACHE_DIR
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
if not os.path.isdir(TRANSACTION_CACHE_DIR):
|
||||
if debug:
|
||||
print(f"DEBUG: Cache directory does not exist: {TRANSACTION_CACHE_DIR}")
|
||||
return []
|
||||
|
||||
out = []
|
||||
cache_dirs = []
|
||||
|
||||
# Collect all cache directories with metadata
|
||||
for name in os.listdir(TRANSACTION_CACHE_DIR):
|
||||
path = os.path.join(TRANSACTION_CACHE_DIR, name)
|
||||
if os.path.isdir(path):
|
||||
try:
|
||||
# Get directory modification time and file count
|
||||
stat = os.stat(path)
|
||||
csv_files = [f for f in os.listdir(path) if f.endswith('.csv')]
|
||||
cache_dirs.append({
|
||||
'name': name,
|
||||
'path': path,
|
||||
'mtime': stat.st_mtime,
|
||||
'csv_count': len(csv_files),
|
||||
'csv_files': csv_files
|
||||
})
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"DEBUG: Error processing cache dir {name}: {e}")
|
||||
continue
|
||||
|
||||
# Sort by modification time (most recent first) to prioritize active accounts
|
||||
cache_dirs.sort(key=lambda x: x['mtime'], reverse=True)
|
||||
|
||||
if debug:
|
||||
print(f"DEBUG: Found {len(cache_dirs)} cache directories")
|
||||
|
||||
for cache_info in cache_dirs:
|
||||
name = cache_info['name']
|
||||
csv_files = cache_info['csv_files']
|
||||
|
||||
if not csv_files:
|
||||
if debug:
|
||||
print(f"DEBUG: Skipping {name} - no CSV files")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Normalize using filename parser to ensure consistent label
|
||||
normalized_label = name
|
||||
account_type = None
|
||||
account_ending = None
|
||||
|
||||
# Strategy 1: Use directory name if it matches expected pattern
|
||||
if re.match(r"^[A-Za-z_]+_XXX\d{3,4}$", name):
|
||||
normalized_label = name
|
||||
parts = name.split('_XXX')
|
||||
account_type = parts[0].replace('_', ' ')
|
||||
account_ending = parts[1] if len(parts) > 1 else name[-3:]
|
||||
else:
|
||||
# Strategy 2: Parse from most recent CSV filename
|
||||
try:
|
||||
from .scraper import parse_suggested_filename
|
||||
latest_csv = sorted(csv_files)[-1] # Get most recent file
|
||||
parsed_filename = parse_suggested_filename(latest_csv)
|
||||
normalized_label = parsed_filename["label"]
|
||||
|
||||
# Extract type and ending from parsed label
|
||||
if '_XXX' in normalized_label:
|
||||
parts = normalized_label.split('_XXX')
|
||||
account_type = parts[0].replace('_', ' ')
|
||||
account_ending = parts[1] if len(parts) > 1 else normalized_label[-3:]
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"DEBUG: Failed to parse filename for {name}: {e}")
|
||||
# Strategy 3: Fallback to directory name parsing
|
||||
normalized_label = name
|
||||
account_type = name
|
||||
account_ending = name[-3:] if name[-3:].isdigit() else "000"
|
||||
|
||||
# Validate the parsed data
|
||||
if not account_ending or not account_ending.isdigit() or len(account_ending) < 3:
|
||||
if debug:
|
||||
print(f"DEBUG: Invalid account ending for {name}: {account_ending}")
|
||||
continue
|
||||
|
||||
# Create account entry
|
||||
account_entry = {
|
||||
"label": normalized_label,
|
||||
"type": account_type or normalized_label.split('_')[0],
|
||||
"ending": account_ending[-3:], # Ensure 3 digits
|
||||
"cache_info": {
|
||||
"last_updated": datetime.fromtimestamp(cache_info['mtime']).isoformat(),
|
||||
"csv_count": cache_info['csv_count']
|
||||
}
|
||||
}
|
||||
|
||||
out.append(account_entry)
|
||||
|
||||
if debug:
|
||||
print(f"DEBUG: Added cache account: {normalized_label} ({account_type} ending {account_ending[-3:]}) - {cache_info['csv_count']} files")
|
||||
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"DEBUG: Error processing cache account {name}: {e}")
|
||||
continue
|
||||
|
||||
if debug:
|
||||
print(f"DEBUG: Successfully processed {len(out)} accounts from cache")
|
||||
if not out:
|
||||
print(f"DEBUG: Cache directory contents: {os.listdir(TRANSACTION_CACHE_DIR) if os.path.isdir(TRANSACTION_CACHE_DIR) else 'N/A'}")
|
||||
|
||||
return out
|
||||
|
||||
|
||||
async def _list_available_accounts_impl(debug: bool = False) -> List[Dict[str, Any]]:
|
||||
"""Return list of available accounts from live page when possible; fall back to cache with enhanced reliability."""
|
||||
if debug:
|
||||
print("DEBUG: Starting account listing with enhanced discovery...")
|
||||
|
||||
# Try live discovery with enhanced error handling
|
||||
cookies = await _ensure_cookies()
|
||||
if cookies:
|
||||
if debug:
|
||||
print("DEBUG: Session cookies available, attempting live account discovery...")
|
||||
|
||||
config = load_config()
|
||||
playwright_url = get_playwright_url(config)
|
||||
p, browser = await connect(playwright_url)
|
||||
context = None
|
||||
page = None
|
||||
try:
|
||||
context = await new_context(browser, cookies=cookies)
|
||||
page = await new_page(context)
|
||||
|
||||
# Use centralized auth-aware navigation with retry
|
||||
max_auth_attempts = 2
|
||||
auth_success = False
|
||||
|
||||
for auth_attempt in range(max_auth_attempts):
|
||||
if debug:
|
||||
print(f"DEBUG: Authentication attempt {auth_attempt + 1}/{max_auth_attempts}...")
|
||||
|
||||
auth_success = await goto_with_auth_check(page, context, "https://client.schwab.com/app/accounts/history/#/", debug=debug)
|
||||
if auth_success:
|
||||
break
|
||||
elif auth_attempt < max_auth_attempts - 1:
|
||||
if debug:
|
||||
print("DEBUG: Authentication failed, retrying...")
|
||||
await page.wait_for_timeout(3000)
|
||||
|
||||
if not auth_success:
|
||||
if debug:
|
||||
print("DEBUG: All authentication attempts failed")
|
||||
raise Exception("Authentication failed after multiple attempts")
|
||||
|
||||
if debug:
|
||||
print("DEBUG: Successfully authenticated, discovering accounts from live dropdown...")
|
||||
|
||||
# Enhanced account discovery with fallback strategies
|
||||
accounts = []
|
||||
|
||||
try:
|
||||
accounts = await discover_accounts_from_page(page, debug=debug)
|
||||
if debug:
|
||||
print(f"DEBUG: Live account discovery returned {len(accounts)} accounts")
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"DEBUG: Live account discovery failed: {e}")
|
||||
accounts = []
|
||||
|
||||
# Enhanced result processing
|
||||
if accounts:
|
||||
if debug:
|
||||
print(f"DEBUG: Successfully discovered {len(accounts)} accounts from live page:")
|
||||
for acc in accounts:
|
||||
print(f"DEBUG: - {acc['label']} ({acc['type']} ending {acc['ending']})")
|
||||
|
||||
# Always try to enrich with cache data for completeness
|
||||
cache_accounts = _get_cache_accounts(debug=debug)
|
||||
if cache_accounts:
|
||||
if debug:
|
||||
print(f"DEBUG: Found {len(cache_accounts)} accounts in cache, merging...")
|
||||
|
||||
# Merge live and cache, preferring live data but keeping unique cache entries
|
||||
combined = {acc['ending']: acc for acc in cache_accounts}
|
||||
live_endings = set()
|
||||
|
||||
for live_acc in accounts:
|
||||
combined[live_acc['ending']] = live_acc # Live data takes precedence
|
||||
live_endings.add(live_acc['ending'])
|
||||
|
||||
result = list(combined.values())
|
||||
if debug:
|
||||
print(f"DEBUG: Final merged result: {len(result)} accounts")
|
||||
for acc in result:
|
||||
source = "live" if acc['ending'] in live_endings else "cache"
|
||||
print(f"DEBUG: - {acc['label']} ({acc['type']} ending {acc['ending']}) [{source}]")
|
||||
|
||||
return result
|
||||
else:
|
||||
if debug:
|
||||
print("DEBUG: No cache data available, returning live accounts only")
|
||||
return accounts
|
||||
else:
|
||||
if debug:
|
||||
print("DEBUG: No accounts discovered from live page, falling back to cache only")
|
||||
|
||||
except Exception as e:
|
||||
if debug:
|
||||
print(f"DEBUG: Live account discovery failed with error: {e}")
|
||||
# Continue to cache fallback
|
||||
|
||||
finally:
|
||||
# Enhanced cleanup
|
||||
cleanup_tasks = []
|
||||
if context is not None:
|
||||
cleanup_tasks.append(context.close())
|
||||
if browser is not None:
|
||||
cleanup_tasks.append(browser.close())
|
||||
if p is not None:
|
||||
cleanup_tasks.append(p.stop())
|
||||
|
||||
for task in cleanup_tasks:
|
||||
try:
|
||||
await task
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
if debug:
|
||||
print("DEBUG: No session cookies available, skipping live discovery")
|
||||
|
||||
# Enhanced cache fallback
|
||||
if debug:
|
||||
print("DEBUG: Using cache-only fallback for account listing...")
|
||||
|
||||
cache_accounts = _get_cache_accounts(debug=debug)
|
||||
if cache_accounts:
|
||||
if debug:
|
||||
print(f"DEBUG: Successfully retrieved {len(cache_accounts)} accounts from cache")
|
||||
return cache_accounts
|
||||
else:
|
||||
if debug:
|
||||
print("DEBUG: No accounts found in cache either")
|
||||
return []
|
||||
|
||||
|
||||
async def list_available_accounts(debug: bool = False) -> Envelope[List[Dict[str, Any]]]:
|
||||
try:
|
||||
accounts = await _list_available_accounts_impl(debug=debug)
|
||||
return ok(accounts)
|
||||
except Exception as exc:
|
||||
return fail(str(exc), ErrorType.UNKNOWN, retryable=True)
|
||||
|
||||
|
||||
async def get_transaction_history(
|
||||
account: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
time_period: Optional[str] = None,
|
||||
debug: bool = False,
|
||||
) -> Envelope[TransactionData]:
|
||||
return await _get_transaction_history_impl(
|
||||
account=account,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
time_period=time_period,
|
||||
debug=debug,
|
||||
)
|
||||
|
||||
|
||||
async def get_transaction_history_enhanced(
|
||||
account: Optional[str] = None,
|
||||
start_date: Optional[str] = None,
|
||||
end_date: Optional[str] = None,
|
||||
time_period: Optional[str] = None,
|
||||
debug: bool = False,
|
||||
) -> Envelope[TransactionData]:
|
||||
return await _get_transaction_history_enhanced_impl(
|
||||
account=account,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
time_period=time_period,
|
||||
debug=debug,
|
||||
)
|
||||
Reference in New Issue
Block a user