From 5c1f4c77d91d86aa8aa1afcc6bd7d824825cf380 Mon Sep 17 00:00:00 2001 From: Nikketryhard Date: Sat, 14 Feb 2026 19:54:37 -0600 Subject: [PATCH] fix: add retry logic for MITM thinking text merge race condition The LS makes two Google API calls for thinking models. Call 2 (thinking summary) may not have arrived by the time usage_from_poll runs after Call 1 (response). Now we peek first, and if thinking tokens exist but text is missing, wait up to 1s for the merge to happen. Also adds peek_usage method to MitmStore for non-consuming reads. --- src/api/responses.rs | 32 +++++++++++++++++++++++++++----- src/mitm/store.rs | 7 +++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/src/api/responses.rs b/src/api/responses.rs index 969d79e..01e4417 100644 --- a/src/api/responses.rs +++ b/src/api/responses.rs @@ -276,11 +276,33 @@ async fn usage_from_poll( output_text: &str, ) -> (Usage, Option) { // Priority 1: MITM intercepted data (most accurate — includes cache tokens + thinking text) - // Try exact cascade_id match first, then fall back to "_latest" (unmatched) - let mitm_usage = match mitm_store.take_usage(cascade_id).await { - Some(u) => Some(u), - None => mitm_store.take_usage("_latest").await, - }; + // Try exact cascade_id match first, then fall back to "_latest" (unmatched). + // + // Race condition: The LS makes TWO Google API calls for thinking models: + // Call 1: response + thinking token count (recorded first) + // Call 2: thinking summary text (merged into Call 1 by the store) + // We may read the usage after Call 1 but before Call 2 arrives. + // If we see thinking tokens but no text, wait briefly for the merge. + let keys_to_try: Vec<&str> = vec![cascade_id, "_latest"]; + let mut mitm_usage = None; + for key in &keys_to_try { + if let Some(u) = mitm_store.peek_usage(key).await { + if u.thinking_output_tokens > 0 && u.thinking_text.is_none() { + // Call 2 hasn't arrived yet — wait briefly for the merge + tracing::debug!("MITM: thinking tokens found but no text, waiting for summary merge..."); + for _ in 0..10 { + tokio::time::sleep(tokio::time::Duration::from_millis(100)).await; + if let Some(u2) = mitm_store.peek_usage(key).await { + if u2.thinking_text.is_some() { + break; + } + } + } + } + mitm_usage = mitm_store.take_usage(key).await; + break; + } + } if let Some(mitm_usage) = mitm_usage { tracing::debug!( input = mitm_usage.input_tokens, diff --git a/src/mitm/store.rs b/src/mitm/store.rs index 69176ed..433c5c5 100644 --- a/src/mitm/store.rs +++ b/src/mitm/store.rs @@ -172,6 +172,13 @@ impl MitmStore { /// Get the latest usage for a cascade, consuming it (one-shot read). /// + /// Peek at usage data for a cascade without consuming it. + /// Used to check if thinking text has been merged before taking. + pub async fn peek_usage(&self, cascade_id: &str) -> Option { + let latest = self.latest_usage.read().await; + latest.get(cascade_id).cloned() + } + /// Only returns exact cascade_id matches — no cross-cascade fallback. /// The `_latest` key is only consumed when the caller explicitly requests it /// (i.e., when the MITM couldn't identify the cascade).