fix: resolve report_url=None for blob-URL tickers and fix dataclass serialization
All checks were successful
Build and Push Docker Image / build (push) Successful in 1m4s
All checks were successful
Build and Push Docker Image / build (push) Successful in 1m4s
When Schwab uses modern blob URLs (increasingly common), find_report()
returns __CLICK_TO_OPEN__ and the scraper skips storing a report_url
even though the PDF downloads and parses successfully. This caused
agents to see report_url=None for tickers like PEP/BR/DPZ/MSCI/BMI.
Changes:
- Fix serialize() to use dataclasses.asdict() instead of str() for
dataclass payloads, producing proper JSON objects instead of Python
repr strings
- Add /reports/{ticker}/pdf endpoint to serve cached Morningstar PDFs
- Enrich report_url with the MCP's own PDF endpoint when blob URLs
were used and the report was successfully downloaded
- Add SCHWAB_MCP_BASE_URL env var to compose for self-referential URLs
This commit is contained in:
@@ -22,6 +22,7 @@ services:
|
||||
cpus: '0.1'
|
||||
environment:
|
||||
- SCHWAB_PLAYWRIGHT_URL=ws://browser.local.ben.io:3000/playwright/chromium?timeout=300000
|
||||
- SCHWAB_MCP_BASE_URL=https://schwab-mcp.ext.ben.io
|
||||
- PORT=8000
|
||||
volumes:
|
||||
- ./cookies.json:/app/cookies.json
|
||||
|
||||
49
server.py
49
server.py
@@ -1,3 +1,4 @@
|
||||
import dataclasses
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
@@ -9,11 +10,12 @@ from typing import Optional, Any, Tuple
|
||||
|
||||
from fastmcp import FastMCP
|
||||
from starlette.applications import Starlette
|
||||
from starlette.responses import JSONResponse
|
||||
from starlette.responses import JSONResponse, Response
|
||||
from starlette.routing import Route, Mount
|
||||
import uvicorn
|
||||
|
||||
import schwab_scraper.unified_api as api
|
||||
from schwab_scraper.storage.cache import read_cached_pdf
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -236,18 +238,25 @@ login_manager = LoginManager()
|
||||
mcp = FastMCP("SchwabScraper")
|
||||
|
||||
|
||||
def _json_default(obj: Any) -> Any:
|
||||
"""JSON fallback handler that converts dataclasses to dicts before str()."""
|
||||
if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
|
||||
return dataclasses.asdict(obj)
|
||||
return str(obj)
|
||||
|
||||
|
||||
def serialize(obj: Any) -> str:
|
||||
"""Safely serialize Pydantic models or dataclasses to JSON string."""
|
||||
if hasattr(obj, "model_dump_json"):
|
||||
return obj.model_dump_json()
|
||||
elif hasattr(obj, "model_dump"):
|
||||
return json.dumps(obj.model_dump(), default=str)
|
||||
return json.dumps(obj.model_dump(), default=_json_default)
|
||||
elif isinstance(obj, list):
|
||||
return json.dumps([
|
||||
o.model_dump() if hasattr(o, "model_dump") else o
|
||||
for o in obj
|
||||
], default=str)
|
||||
return json.dumps(obj, default=str)
|
||||
], default=_json_default)
|
||||
return json.dumps(obj, default=_json_default)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -457,6 +466,21 @@ async def get_morningstar_data(ticker: str, debug: bool = False) -> str:
|
||||
debug: Enable debug logging
|
||||
"""
|
||||
result = await api.get_morningstar_data(ticker, debug=debug)
|
||||
|
||||
# When the scraper used blob URLs (modern Schwab web components), report_url
|
||||
# is None even though the PDF was downloaded and parsed successfully. Point
|
||||
# callers at the MCP server's cached-PDF endpoint instead.
|
||||
if (
|
||||
isinstance(result, dict)
|
||||
and result.get("success")
|
||||
and result.get("data") is not None
|
||||
):
|
||||
data = result["data"]
|
||||
if hasattr(data, "report_url") and data.report_url is None and data.source is not None:
|
||||
base = os.getenv("SCHWAB_MCP_BASE_URL", "").rstrip("/")
|
||||
if base:
|
||||
data.report_url = f"{base}/reports/{ticker.upper()}/pdf"
|
||||
|
||||
return serialize(result)
|
||||
|
||||
|
||||
@@ -527,10 +551,27 @@ async def health(request):
|
||||
return JSONResponse({"status": "ok"})
|
||||
|
||||
|
||||
async def serve_report_pdf(request):
|
||||
"""Serve a cached Morningstar report PDF by ticker."""
|
||||
ticker = request.path_params["ticker"].upper()
|
||||
pdf_bytes = read_cached_pdf(ticker)
|
||||
if not pdf_bytes:
|
||||
return JSONResponse(
|
||||
{"error": f"No cached report for {ticker}. Call get_morningstar_data first."},
|
||||
status_code=404,
|
||||
)
|
||||
return Response(
|
||||
pdf_bytes,
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": f'inline; filename="{ticker}_morningstar.pdf"'},
|
||||
)
|
||||
|
||||
|
||||
mcp_app = mcp.http_app()
|
||||
app = Starlette(
|
||||
routes=[
|
||||
Route("/health", health),
|
||||
Route("/reports/{ticker}/pdf", serve_report_pdf),
|
||||
Mount("/", app=mcp_app),
|
||||
],
|
||||
lifespan=mcp_app.lifespan,
|
||||
|
||||
Reference in New Issue
Block a user