feat: capture thinking text from MITM-intercepted API responses

The LS strips thinking/reasoning text from plannerResponse steps — only the thinkingSignature (opaque verification blob) is preserved. The actual thinking text flows through the MITM proxy in the raw Google SSE response (parts with thought: true) and Anthropic SSE (thinking_delta content blocks). Changes: - StreamingAccumulator now accumulates thinking text from SSE events - ApiUsage gains thinking_text: Option<String> - usage_from_poll returns (Usage, Option<thinking_text>) - Thinking text priority: MITM-captured > LS-extracted (fallback) - Reasoning output item now populated from real API data - Removed debug dump code
2026-02-14 19:30:09 -06:00
parent 19dc920872
commit 905d55beb5
4 changed files with 60 additions and 17 deletions
--- a/src/api/responses.rs
+++ b/src/api/responses.rs
@@ -261,18 +261,21 @@ struct RequestParams {
    metadata: serde_json::Value,
 }

-/// Build Usage from the best available source:
-/// 1. MITM intercepted data (real API tokens, including cache stats)
+/// Build Usage from the best available source, and extract thinking text from MITM:
+/// 1. MITM intercepted data (real API tokens, including cache stats + thinking text)
 /// 2. LS trajectory data (real tokens, no cache info)
 /// 3. Estimation from text lengths (fallback)
+///
+/// Returns (Usage, Option<thinking_text>). The LS strips thinking text from steps,
+/// so we capture it from the raw MITM-intercepted API response.
 async fn usage_from_poll(
    mitm_store: &crate::mitm::store::MitmStore,
    cascade_id: &str,
    model_usage: &Option<super::polling::ModelUsage>,
    input_text: &str,
    output_text: &str,
-) -> Usage {
-    // Priority 1: MITM intercepted data (most accurate — includes cache tokens)
+) -> (Usage, Option<String>) {
+    // Priority 1: MITM intercepted data (most accurate — includes cache tokens + thinking text)
    // Try exact cascade_id match first, then fall back to "_latest" (unmatched)
    let mitm_usage = match mitm_store.take_usage(cascade_id).await {
        Some(u) => Some(u),
@@ -285,9 +288,11 @@ async fn usage_from_poll(
            cache_read = mitm_usage.cache_read_input_tokens,
            cache_create = mitm_usage.cache_creation_input_tokens,
            thinking = mitm_usage.thinking_output_tokens,
+            thinking_text_len = mitm_usage.thinking_text.as_ref().map_or(0, |t| t.len()),
            "Using MITM intercepted usage"
        );
-        return Usage {
+        let thinking_text = mitm_usage.thinking_text;
+        let usage = Usage {
            input_tokens: mitm_usage.input_tokens,
            input_tokens_details: InputTokensDetails {
                cached_tokens: mitm_usage.cache_read_input_tokens,
@@ -298,21 +303,22 @@ async fn usage_from_poll(
            },
            total_tokens: mitm_usage.input_tokens + mitm_usage.output_tokens,
        };
+        return (usage, thinking_text);
    }

    // Priority 2: LS trajectory data (from CHECKPOINT/metadata steps)
    if let Some(u) = model_usage {
-        return Usage {
+        return (Usage {
            input_tokens: u.input_tokens,
            input_tokens_details: InputTokensDetails { cached_tokens: 0 },
            output_tokens: u.output_tokens,
            output_tokens_details: OutputTokensDetails { reasoning_tokens: 0 },
            total_tokens: u.input_tokens + u.output_tokens,
-        };
+        }, None);
    }

    // Priority 3: Estimate from text lengths
-    Usage::estimate(input_text, output_text)
+    (Usage::estimate(input_text, output_text), None)
 }

 // ─── Sync response ───────────────────────────────────────────────────────────
@@ -333,12 +339,15 @@ async fn handle_responses_sync(
        uuid::Uuid::new_v4().to_string().replace('-', "")
    );

-    let usage = usage_from_poll(&state.mitm_store, &cascade_id, &poll_result.usage, &params.user_text, &poll_result.text).await;
+    let (usage, mitm_thinking) = usage_from_poll(&state.mitm_store, &cascade_id, &poll_result.usage, &params.user_text, &poll_result.text).await;
+
+    // Thinking text priority: MITM-captured (raw API) > LS-extracted (steps)
+    let thinking_text = mitm_thinking.or(poll_result.thinking);

    // Build output array: [reasoning (if present), message]
    let mut output_items: Vec<serde_json::Value> = Vec::new();
-    if let Some(ref thinking_text) = poll_result.thinking {
-        output_items.push(build_reasoning_output(thinking_text));
+    if let Some(ref thinking) = thinking_text {
+        output_items.push(build_reasoning_output(thinking));
    }
    output_items.push(build_message_output(&msg_id, &poll_result.text));

@@ -479,9 +488,9 @@ async fn handle_responses_stream(
                        if is_response_done(steps) && !last_text.is_empty() {
                            debug!("Response done, text length={}", last_text.len());
                            let mu = extract_model_usage(steps);
-                            let usage = usage_from_poll(&state.mitm_store, &cascade_id, &mu, &params.user_text, &last_text).await;
+                            let (usage, mitm_thinking) = usage_from_poll(&state.mitm_store, &cascade_id, &mu, &params.user_text, &last_text).await;
                            let ts = extract_thinking_signature(steps);
-                            let tc = extract_thinking_content(steps);
+                            let tc = mitm_thinking.or_else(|| extract_thinking_content(steps));
                            let td = extract_thinking_duration(steps);
                            for evt in completion_events(
                                &response_id, &model_name, &msg_id,
@@ -502,9 +511,9 @@ async fn handle_responses_stream(
                                    if run_status.contains("IDLE") && !last_text.is_empty() {
                                        debug!("Trajectory IDLE, text length={}", last_text.len());
                                        let mu = extract_model_usage(steps);
-                                        let usage = usage_from_poll(&state.mitm_store, &cascade_id, &mu, &params.user_text, &last_text).await;
+                                        let (usage, mitm_thinking) = usage_from_poll(&state.mitm_store, &cascade_id, &mu, &params.user_text, &last_text).await;
                                        let ts = extract_thinking_signature(steps);
-                                        let tc = extract_thinking_content(steps);
+                                        let tc = mitm_thinking.or_else(|| extract_thinking_content(steps));
                                        let td = extract_thinking_duration(steps);
                                        for evt in completion_events(
                                            &response_id, &model_name, &msg_id,