fix: cascade correlation — fallback to _latest MITM usage

When the MITM can't extract a cascade ID from the intercepted request
(Content-Length: 0 / chunked encoding), usage is stored under '_latest'.
Now usage_from_poll and completions try the exact cascade_id first,
then fall back to '_latest' so MITM-captured tokens are actually used.
This commit is contained in:
Nikketryhard
2026-02-14 18:10:04 -06:00
parent ca36ab0631
commit 061b08fc8f
2 changed files with 12 additions and 3 deletions

View File

@@ -308,8 +308,12 @@ async fn chat_completions_sync(
) -> axum::response::Response {
let result = poll_for_response(&state, &cascade_id, timeout).await;
// Check MITM store first for real intercepted usage
let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = state.mitm_store.take_usage(&cascade_id).await {
// Check MITM store first for real intercepted usage (fallback to _latest)
let mitm = match state.mitm_store.take_usage(&cascade_id).await {
Some(u) => Some(u),
None => state.mitm_store.take_usage("_latest").await,
};
let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = mitm {
(mitm_usage.input_tokens, mitm_usage.output_tokens, mitm_usage.cache_read_input_tokens)
} else if let Some(u) = &result.usage {
(u.input_tokens, u.output_tokens, 0)