From 5c1f4c77d91d86aa8aa1afcc6bd7d824825cf380 Mon Sep 17 00:00:00 2001
From: Nikketryhard <louisnewmobile@gmail.com>
Date: Sat, 14 Feb 2026 19:54:37 -0600
Subject: [PATCH] fix: add retry logic for MITM thinking text merge race
 condition

The LS makes two Google API calls for thinking models. Call 2 (thinking
summary) may not have arrived by the time usage_from_poll runs after
Call 1 (response). Now we peek first, and if thinking tokens exist but
text is missing, wait up to 1s for the merge to happen.

Also adds peek_usage method to MitmStore for non-consuming reads.
---
 src/api/responses.rs | 32 +++++++++++++++++++++++++++-----
 src/mitm/store.rs    |  7 +++++++
 2 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/src/api/responses.rs b/src/api/responses.rs
index 969d79e..01e4417 100644
--- a/src/api/responses.rs
+++ b/src/api/responses.rs
@@ -276,11 +276,33 @@ async fn usage_from_poll(
     output_text: &str,
 ) -> (Usage, Option<String>) {
     // Priority 1: MITM intercepted data (most accurate — includes cache tokens + thinking text)
-    // Try exact cascade_id match first, then fall back to "_latest" (unmatched)
-    let mitm_usage = match mitm_store.take_usage(cascade_id).await {
-        Some(u) => Some(u),
-        None => mitm_store.take_usage("_latest").await,
-    };
+    // Try exact cascade_id match first, then fall back to "_latest" (unmatched).
+    //
+    // Race condition: The LS makes TWO Google API calls for thinking models:
+    //   Call 1: response + thinking token count (recorded first)
+    //   Call 2: thinking summary text (merged into Call 1 by the store)
+    // We may read the usage after Call 1 but before Call 2 arrives.
+    // If we see thinking tokens but no text, wait briefly for the merge.
+    let keys_to_try: Vec<&str> = vec![cascade_id, "_latest"];
+    let mut mitm_usage = None;
+    for key in &keys_to_try {
+        if let Some(u) = mitm_store.peek_usage(key).await {
+            if u.thinking_output_tokens > 0 && u.thinking_text.is_none() {
+                // Call 2 hasn't arrived yet — wait briefly for the merge
+                tracing::debug!("MITM: thinking tokens found but no text, waiting for summary merge...");
+                for _ in 0..10 {
+                    tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
+                    if let Some(u2) = mitm_store.peek_usage(key).await {
+                        if u2.thinking_text.is_some() {
+                            break;
+                        }
+                    }
+                }
+            }
+            mitm_usage = mitm_store.take_usage(key).await;
+            break;
+        }
+    }
     if let Some(mitm_usage) = mitm_usage {
         tracing::debug!(
             input = mitm_usage.input_tokens,
diff --git a/src/mitm/store.rs b/src/mitm/store.rs
index 69176ed..433c5c5 100644
--- a/src/mitm/store.rs
+++ b/src/mitm/store.rs
@@ -172,6 +172,13 @@ impl MitmStore {
 
     /// Get the latest usage for a cascade, consuming it (one-shot read).
     ///
+    /// Peek at usage data for a cascade without consuming it.
+    /// Used to check if thinking text has been merged before taking.
+    pub async fn peek_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
+        let latest = self.latest_usage.read().await;
+        latest.get(cascade_id).cloned()
+    }
+
     /// Only returns exact cascade_id matches — no cross-cascade fallback.
     /// The `_latest` key is only consumed when the caller explicitly requests it
     /// (i.e., when the MITM couldn't identify the cascade).