fix: cascade correlation — fallback to _latest MITM usage
When the MITM can't extract a cascade ID from the intercepted request (Content-Length: 0 / chunked encoding), usage is stored under '_latest'. Now usage_from_poll and completions try the exact cascade_id first, then fall back to '_latest' so MITM-captured tokens are actually used.
This commit is contained in:
@@ -308,8 +308,12 @@ async fn chat_completions_sync(
|
|||||||
) -> axum::response::Response {
|
) -> axum::response::Response {
|
||||||
let result = poll_for_response(&state, &cascade_id, timeout).await;
|
let result = poll_for_response(&state, &cascade_id, timeout).await;
|
||||||
|
|
||||||
// Check MITM store first for real intercepted usage
|
// Check MITM store first for real intercepted usage (fallback to _latest)
|
||||||
let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = state.mitm_store.take_usage(&cascade_id).await {
|
let mitm = match state.mitm_store.take_usage(&cascade_id).await {
|
||||||
|
Some(u) => Some(u),
|
||||||
|
None => state.mitm_store.take_usage("_latest").await,
|
||||||
|
};
|
||||||
|
let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = mitm {
|
||||||
(mitm_usage.input_tokens, mitm_usage.output_tokens, mitm_usage.cache_read_input_tokens)
|
(mitm_usage.input_tokens, mitm_usage.output_tokens, mitm_usage.cache_read_input_tokens)
|
||||||
} else if let Some(u) = &result.usage {
|
} else if let Some(u) = &result.usage {
|
||||||
(u.input_tokens, u.output_tokens, 0)
|
(u.input_tokens, u.output_tokens, 0)
|
||||||
|
|||||||
@@ -277,7 +277,12 @@ async fn usage_from_poll(
|
|||||||
output_text: &str,
|
output_text: &str,
|
||||||
) -> Usage {
|
) -> Usage {
|
||||||
// Priority 1: MITM intercepted data (most accurate — includes cache tokens)
|
// Priority 1: MITM intercepted data (most accurate — includes cache tokens)
|
||||||
if let Some(mitm_usage) = mitm_store.take_usage(cascade_id).await {
|
// Try exact cascade_id match first, then fall back to "_latest" (unmatched)
|
||||||
|
let mitm_usage = match mitm_store.take_usage(cascade_id).await {
|
||||||
|
Some(u) => Some(u),
|
||||||
|
None => mitm_store.take_usage("_latest").await,
|
||||||
|
};
|
||||||
|
if let Some(mitm_usage) = mitm_usage {
|
||||||
tracing::debug!(
|
tracing::debug!(
|
||||||
input = mitm_usage.input_tokens,
|
input = mitm_usage.input_tokens,
|
||||||
output = mitm_usage.output_tokens,
|
output = mitm_usage.output_tokens,
|
||||||
|
|||||||
Reference in New Issue
Block a user