From 061b08fc8f26d0aeeefcaeaf5627fe1bfb105b27 Mon Sep 17 00:00:00 2001 From: Nikketryhard Date: Sat, 14 Feb 2026 18:10:04 -0600 Subject: [PATCH] =?UTF-8?q?fix:=20cascade=20correlation=20=E2=80=94=20fall?= =?UTF-8?q?back=20to=20=5Flatest=20MITM=20usage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the MITM can't extract a cascade ID from the intercepted request (Content-Length: 0 / chunked encoding), usage is stored under '_latest'. Now usage_from_poll and completions try the exact cascade_id first, then fall back to '_latest' so MITM-captured tokens are actually used. --- src/api/completions.rs | 8 ++++++-- src/api/responses.rs | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/api/completions.rs b/src/api/completions.rs index 9983499..3f9dde0 100644 --- a/src/api/completions.rs +++ b/src/api/completions.rs @@ -308,8 +308,12 @@ async fn chat_completions_sync( ) -> axum::response::Response { let result = poll_for_response(&state, &cascade_id, timeout).await; - // Check MITM store first for real intercepted usage - let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = state.mitm_store.take_usage(&cascade_id).await { + // Check MITM store first for real intercepted usage (fallback to _latest) + let mitm = match state.mitm_store.take_usage(&cascade_id).await { + Some(u) => Some(u), + None => state.mitm_store.take_usage("_latest").await, + }; + let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = mitm { (mitm_usage.input_tokens, mitm_usage.output_tokens, mitm_usage.cache_read_input_tokens) } else if let Some(u) = &result.usage { (u.input_tokens, u.output_tokens, 0) diff --git a/src/api/responses.rs b/src/api/responses.rs index 1e3033d..206d58f 100644 --- a/src/api/responses.rs +++ b/src/api/responses.rs @@ -277,7 +277,12 @@ async fn usage_from_poll( output_text: &str, ) -> Usage { // Priority 1: MITM intercepted data (most accurate — includes cache tokens) - if let Some(mitm_usage) = mitm_store.take_usage(cascade_id).await { + // Try exact cascade_id match first, then fall back to "_latest" (unmatched) + let mitm_usage = match mitm_store.take_usage(cascade_id).await { + Some(u) => Some(u), + None => mitm_store.take_usage("_latest").await, + }; + if let Some(mitm_usage) = mitm_usage { tracing::debug!( input = mitm_usage.input_tokens, output = mitm_usage.output_tokens,