|
|
|
|
@@ -1,16 +1,141 @@
|
|
|
|
|
import dataclasses
|
|
|
|
|
import io
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
from contextlib import contextmanager
|
|
|
|
|
from typing import Optional, Any, Tuple
|
|
|
|
|
|
|
|
|
|
from fastmcp import FastMCP
|
|
|
|
|
from starlette.applications import Starlette
|
|
|
|
|
from starlette.responses import JSONResponse
|
|
|
|
|
from starlette.responses import JSONResponse, Response
|
|
|
|
|
from starlette.routing import Route, Mount
|
|
|
|
|
import uvicorn
|
|
|
|
|
|
|
|
|
|
import schwab_scraper.unified_api as api
|
|
|
|
|
from schwab_scraper.storage.cache import read_cached_pdf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Configure logging so it actually reaches stderr (visible in docker logs).
|
|
|
|
|
# The scraper and MCP libraries log extensively but don't set up handlers
|
|
|
|
|
# when imported as a module, so messages are silently dropped.
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
level=logging.INFO,
|
|
|
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
|
|
|
stream=sys.stderr,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Ensure the scraper logger propagates to our root handler
|
|
|
|
|
_scraper_logger = logging.getLogger("schwab_scraper")
|
|
|
|
|
_scraper_logger.setLevel(logging.DEBUG if os.getenv("SCHWAB_DEBUG", "").lower() in ("1", "true") else logging.INFO)
|
|
|
|
|
_scraper_logger.propagate = True
|
|
|
|
|
|
|
|
|
|
_startup_logger = logging.getLogger("schwab_mcp_custom")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _read_commit_file(path: str) -> str | None:
|
|
|
|
|
try:
|
|
|
|
|
with open(path) as f:
|
|
|
|
|
return f.read().strip() or None
|
|
|
|
|
except FileNotFoundError:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_scraper_commit = _read_commit_file(
|
|
|
|
|
os.path.join(os.path.dirname(__file__), "schwab-scraper-commit.txt")
|
|
|
|
|
)
|
|
|
|
|
_mcp_commit = _read_commit_file(
|
|
|
|
|
os.path.join(os.path.dirname(__file__), "mcp-server-commit.txt")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if _scraper_commit:
|
|
|
|
|
_startup_logger.info("schwab-scraper commit: %s", _scraper_commit)
|
|
|
|
|
else:
|
|
|
|
|
_startup_logger.info("schwab-scraper commit: (not available)")
|
|
|
|
|
|
|
|
|
|
if _mcp_commit:
|
|
|
|
|
_startup_logger.info("mcp-server commit: %s", _mcp_commit)
|
|
|
|
|
else:
|
|
|
|
|
_startup_logger.info("mcp-server commit: (not available)")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from importlib.metadata import version as _pkg_version
|
|
|
|
|
|
|
|
|
|
_startup_logger.info("schwab-scraper package version: %s", _pkg_version("schwab-scraper"))
|
|
|
|
|
except Exception:
|
|
|
|
|
_startup_logger.info("schwab-scraper package version: (unknown)")
|
|
|
|
|
|
|
|
|
|
_DEFAULT_BASE_URL = "https://schwab-mcp.ext.ben.io"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Log capture helper — captures scraper logs to a string buffer AND tees
|
|
|
|
|
# them to stderr so they remain visible in docker logs.
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
class _TeeHandler(logging.StreamHandler):
|
|
|
|
|
"""Handler that copies every record to a secondary (StringIO) buffer."""
|
|
|
|
|
|
|
|
|
|
def __init__(self, stream, extra_buf: io.StringIO, level=logging.NOTSET):
|
|
|
|
|
super().__init__(stream)
|
|
|
|
|
self.extra_buf = extra_buf
|
|
|
|
|
self.tee_level = level
|
|
|
|
|
|
|
|
|
|
def emit(self, record):
|
|
|
|
|
super().emit(record)
|
|
|
|
|
if record.levelno >= self.tee_level:
|
|
|
|
|
try:
|
|
|
|
|
msg = self.format(record)
|
|
|
|
|
self.extra_buf.write(msg + "\n")
|
|
|
|
|
self.extra_buf.flush()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
|
def capture_logs(logger_name: str = "schwab_scraper", level: int = logging.DEBUG):
|
|
|
|
|
"""
|
|
|
|
|
Context manager that captures log output to a string buffer
|
|
|
|
|
while still writing to stderr (docker-visible).
|
|
|
|
|
|
|
|
|
|
Yields the buffer so callers can read captured logs after the block.
|
|
|
|
|
"""
|
|
|
|
|
logger = logging.getLogger(logger_name)
|
|
|
|
|
old_level = logger.level
|
|
|
|
|
if old_level > level:
|
|
|
|
|
logger.setLevel(level)
|
|
|
|
|
|
|
|
|
|
buf = io.StringIO()
|
|
|
|
|
handler = _TeeHandler(sys.stderr, buf, level=level)
|
|
|
|
|
handler.setLevel(level)
|
|
|
|
|
handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
|
|
|
|
|
logger.addHandler(handler)
|
|
|
|
|
|
|
|
|
|
# Also tee the root logger in case scraper logs through sub-loggers
|
|
|
|
|
root_old_level = logging.getLogger().level
|
|
|
|
|
if root_old_level > level:
|
|
|
|
|
logging.getLogger().setLevel(level)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
yield buf
|
|
|
|
|
finally:
|
|
|
|
|
logger.removeHandler(handler)
|
|
|
|
|
if old_level != logger.level:
|
|
|
|
|
logger.setLevel(old_level)
|
|
|
|
|
if root_old_level != logging.getLogger().level:
|
|
|
|
|
logging.getLogger().setLevel(root_old_level)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _enrich_with_logs(result: dict, log_buffer: io.StringIO, debug: bool) -> dict:
|
|
|
|
|
"""Attach captured logs to a result dict when debug=True or on error."""
|
|
|
|
|
logs = log_buffer.getvalue()
|
|
|
|
|
if logs and (debug or not result.get("success", False)):
|
|
|
|
|
result["logs"] = logs
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Monkey-patch mcp.shared.session.RequestResponder to work around a
|
|
|
|
|
@@ -115,18 +240,25 @@ login_manager = LoginManager()
|
|
|
|
|
mcp = FastMCP("SchwabScraper")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _json_default(obj: Any) -> Any:
|
|
|
|
|
"""JSON fallback handler that converts dataclasses to dicts before str()."""
|
|
|
|
|
if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
|
|
|
|
|
return dataclasses.asdict(obj)
|
|
|
|
|
return str(obj)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def serialize(obj: Any) -> str:
|
|
|
|
|
"""Safely serialize Pydantic models or dataclasses to JSON string."""
|
|
|
|
|
if hasattr(obj, "model_dump_json"):
|
|
|
|
|
return obj.model_dump_json()
|
|
|
|
|
elif hasattr(obj, "model_dump"):
|
|
|
|
|
return json.dumps(obj.model_dump(), default=str)
|
|
|
|
|
return json.dumps(obj.model_dump(), default=_json_default)
|
|
|
|
|
elif isinstance(obj, list):
|
|
|
|
|
return json.dumps([
|
|
|
|
|
o.model_dump() if hasattr(o, "model_dump") else o
|
|
|
|
|
for o in obj
|
|
|
|
|
], default=str)
|
|
|
|
|
return json.dumps(obj, default=str)
|
|
|
|
|
], default=_json_default)
|
|
|
|
|
return json.dumps(obj, default=_json_default)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
@@ -177,9 +309,71 @@ async def login(
|
|
|
|
|
"data": None,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
result = await api.login(username=username, password=password, debug=debug)
|
|
|
|
|
success = result.get("success", False)
|
|
|
|
|
login_manager.record_attempt(success)
|
|
|
|
|
mcp_logger = logging.getLogger("schwab_mcp_custom")
|
|
|
|
|
mcp_logger.info("=== LOGIN TOOL CALLED ===")
|
|
|
|
|
mcp_logger.info(f"debug={debug}, username_provided={bool(username)}, password_provided={bool(password)}")
|
|
|
|
|
|
|
|
|
|
# Diagnostic: if credentials not provided, show what config path would be used
|
|
|
|
|
if not username or not password:
|
|
|
|
|
from schwab_scraper.core.config import get_config_path
|
|
|
|
|
config_path = get_config_path()
|
|
|
|
|
config_exists = os.path.exists(config_path)
|
|
|
|
|
mcp_logger.info(f"Config fallback: path={config_path}, exists={config_exists}")
|
|
|
|
|
|
|
|
|
|
with capture_logs(level=logging.DEBUG if debug else logging.INFO) as log_buf:
|
|
|
|
|
mcp_logger.info("capture_logs context entered")
|
|
|
|
|
if debug:
|
|
|
|
|
mcp_logger.info("DEBUG MODE ENABLED — verbose logging active")
|
|
|
|
|
|
|
|
|
|
# api.login does not exist in unified_api; call the underlying scraper directly
|
|
|
|
|
from schwab_scraper.browser.auth import login_to_schwab
|
|
|
|
|
from schwab_scraper.core.config import get_schwab_credentials, load_config
|
|
|
|
|
|
|
|
|
|
if not username or not password:
|
|
|
|
|
config = load_config()
|
|
|
|
|
username, password = get_schwab_credentials(config)
|
|
|
|
|
|
|
|
|
|
if not username or not password:
|
|
|
|
|
result = {
|
|
|
|
|
"success": False,
|
|
|
|
|
"error": "Username and password are required (or set in config.json)",
|
|
|
|
|
"error_type": "AUTHENTICATION",
|
|
|
|
|
"retryable": False,
|
|
|
|
|
"data": None,
|
|
|
|
|
}
|
|
|
|
|
else:
|
|
|
|
|
try:
|
|
|
|
|
cookies = await login_to_schwab(username, password)
|
|
|
|
|
if cookies:
|
|
|
|
|
result = {
|
|
|
|
|
"success": True,
|
|
|
|
|
"data": {"cookies_count": len(cookies)},
|
|
|
|
|
"error": None,
|
|
|
|
|
"error_type": None,
|
|
|
|
|
"retryable": False,
|
|
|
|
|
}
|
|
|
|
|
else:
|
|
|
|
|
result = {
|
|
|
|
|
"success": False,
|
|
|
|
|
"error": "Login failed — no cookies returned. Check credentials or 2FA status.",
|
|
|
|
|
"error_type": "AUTHENTICATION",
|
|
|
|
|
"retryable": True,
|
|
|
|
|
"data": None,
|
|
|
|
|
}
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
result = {
|
|
|
|
|
"success": False,
|
|
|
|
|
"error": str(exc),
|
|
|
|
|
"error_type": "UNKNOWN",
|
|
|
|
|
"retryable": True,
|
|
|
|
|
"data": None,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
success = result.get("success", False)
|
|
|
|
|
login_manager.record_attempt(success)
|
|
|
|
|
mcp_logger.info(f"login completed — success={success}")
|
|
|
|
|
result = _enrich_with_logs(result, log_buf, debug)
|
|
|
|
|
mcp_logger.info("capture_logs context exited, returning result")
|
|
|
|
|
return serialize(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -190,7 +384,9 @@ async def refresh_session(debug: bool = False) -> str:
|
|
|
|
|
Args:
|
|
|
|
|
debug: Enable debug logging
|
|
|
|
|
"""
|
|
|
|
|
result = await api.refresh_session(debug=debug)
|
|
|
|
|
with capture_logs(level=logging.DEBUG if debug else logging.INFO) as log_buf:
|
|
|
|
|
result = await api.refresh_session(debug=debug)
|
|
|
|
|
result = _enrich_with_logs(result, log_buf, debug)
|
|
|
|
|
return serialize(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -272,6 +468,20 @@ async def get_morningstar_data(ticker: str, debug: bool = False) -> str:
|
|
|
|
|
debug: Enable debug logging
|
|
|
|
|
"""
|
|
|
|
|
result = await api.get_morningstar_data(ticker, debug=debug)
|
|
|
|
|
|
|
|
|
|
# When the scraper used blob URLs (modern Schwab web components), report_url
|
|
|
|
|
# is None even though the PDF was downloaded and parsed successfully. Point
|
|
|
|
|
# callers at the MCP server's cached-PDF endpoint instead.
|
|
|
|
|
if (
|
|
|
|
|
isinstance(result, dict)
|
|
|
|
|
and result.get("success")
|
|
|
|
|
and result.get("data") is not None
|
|
|
|
|
):
|
|
|
|
|
data = result["data"]
|
|
|
|
|
if hasattr(data, "report_url") and data.report_url is None and data.source is not None:
|
|
|
|
|
base = os.getenv("SCHWAB_MCP_BASE_URL", _DEFAULT_BASE_URL).rstrip("/")
|
|
|
|
|
data.report_url = f"{base}/reports/{ticker.upper()}/pdf"
|
|
|
|
|
|
|
|
|
|
return serialize(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -284,9 +494,31 @@ async def upload_cookies(cookies_json: str) -> str:
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
cookies = json.loads(cookies_json)
|
|
|
|
|
with open("cookies.json", "w") as f:
|
|
|
|
|
json.dump(cookies, f)
|
|
|
|
|
return json.dumps({"status": "success", "message": "cookies.json updated successfully"})
|
|
|
|
|
|
|
|
|
|
# Some browser extensions wrap cookies in an object (e.g. {"cookies": [...]})
|
|
|
|
|
if isinstance(cookies, dict):
|
|
|
|
|
if "cookies" in cookies:
|
|
|
|
|
cookies = cookies["cookies"]
|
|
|
|
|
else:
|
|
|
|
|
return json.dumps({
|
|
|
|
|
"status": "error",
|
|
|
|
|
"message": "Expected a list of cookies or an object with a 'cookies' key",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if not isinstance(cookies, list):
|
|
|
|
|
return json.dumps({
|
|
|
|
|
"status": "error",
|
|
|
|
|
"message": f"Expected a list of cookies, got {type(cookies).__name__}",
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
from schwab_scraper.core.config import get_cookies_path
|
|
|
|
|
cookies_path = get_cookies_path()
|
|
|
|
|
with open(cookies_path, "w") as f:
|
|
|
|
|
json.dump(cookies, f, indent=2)
|
|
|
|
|
return json.dumps({
|
|
|
|
|
"status": "success",
|
|
|
|
|
"message": f"{cookies_path} updated with {len(cookies)} cookies",
|
|
|
|
|
})
|
|
|
|
|
except Exception as e:
|
|
|
|
|
return json.dumps({"status": "error", "message": str(e)})
|
|
|
|
|
|
|
|
|
|
@@ -320,10 +552,27 @@ async def health(request):
|
|
|
|
|
return JSONResponse({"status": "ok"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def serve_report_pdf(request):
|
|
|
|
|
"""Serve a cached Morningstar report PDF by ticker."""
|
|
|
|
|
ticker = request.path_params["ticker"].upper()
|
|
|
|
|
pdf_bytes = read_cached_pdf(ticker)
|
|
|
|
|
if not pdf_bytes:
|
|
|
|
|
return JSONResponse(
|
|
|
|
|
{"error": f"No cached report for {ticker}. Call get_morningstar_data first."},
|
|
|
|
|
status_code=404,
|
|
|
|
|
)
|
|
|
|
|
return Response(
|
|
|
|
|
pdf_bytes,
|
|
|
|
|
media_type="application/pdf",
|
|
|
|
|
headers={"Content-Disposition": f'inline; filename="{ticker}_morningstar.pdf"'},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mcp_app = mcp.http_app()
|
|
|
|
|
app = Starlette(
|
|
|
|
|
routes=[
|
|
|
|
|
Route("/health", health),
|
|
|
|
|
Route("/reports/{ticker}/pdf", serve_report_pdf),
|
|
|
|
|
Mount("/", app=mcp_app),
|
|
|
|
|
],
|
|
|
|
|
lifespan=mcp_app.lifespan,
|
|
|
|
|
|