fix: cascade correlation — fallback to _latest MITM usage
When the MITM can't extract a cascade ID from the intercepted request (Content-Length: 0 / chunked encoding), usage is stored under '_latest'. Now usage_from_poll and completions try the exact cascade_id first, then fall back to '_latest' so MITM-captured tokens are actually used.
This commit is contained in:
@@ -308,8 +308,12 @@ async fn chat_completions_sync(
|
||||
) -> axum::response::Response {
|
||||
let result = poll_for_response(&state, &cascade_id, timeout).await;
|
||||
|
||||
// Check MITM store first for real intercepted usage
|
||||
let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = state.mitm_store.take_usage(&cascade_id).await {
|
||||
// Check MITM store first for real intercepted usage (fallback to _latest)
|
||||
let mitm = match state.mitm_store.take_usage(&cascade_id).await {
|
||||
Some(u) => Some(u),
|
||||
None => state.mitm_store.take_usage("_latest").await,
|
||||
};
|
||||
let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = mitm {
|
||||
(mitm_usage.input_tokens, mitm_usage.output_tokens, mitm_usage.cache_read_input_tokens)
|
||||
} else if let Some(u) = &result.usage {
|
||||
(u.input_tokens, u.output_tokens, 0)
|
||||
|
||||
Reference in New Issue
Block a user