fix: gemini route, usage capture, search timeout, and trace finalization

- Add missing /v1/gemini POST route and handler
- Capture MitmEvent::Usage in gemini sync/streaming handlers
- Add retry counter (max 3) to search handler to prevent hang
- Add trace finalization at all gemini_sync channel exit points
- Fix UpstreamError trace outcome label
- Add timeout trace with error recording
- Dispatch Usage before ResponseComplete in SSE flush
This commit is contained in:
Nikketryhard
2026-02-18 01:31:18 -06:00
parent 48674f65da
commit 28d3296c87
11 changed files with 1480 additions and 221 deletions

View File

@@ -11,7 +11,7 @@ use axum::{
use rand::Rng;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::Arc;
use tracing::{debug, info};
use tracing::{debug, info, warn};
use super::models::{lookup_model, DEFAULT_MODEL, MODELS};
use super::polling::{
@@ -364,14 +364,9 @@ pub(crate) async fn handle_responses(
}
});
// Build event channel
let has_custom_tools = tools.is_some();
let (mitm_rx, event_tx) = if has_custom_tools {
let (tx, rx) = tokio::sync::mpsc::channel(64);
(Some(rx), Some(tx))
} else {
(None, None)
};
// Build event channel — always created for MITM response path
let (tx, rx) = tokio::sync::mpsc::channel(64);
let (mitm_rx, event_tx) = (Some(rx), tx);
// Build tool rounds now that cascade_id is known
let mut tool_rounds: Vec<crate::mitm::store::ToolRound> = Vec::new();
@@ -385,7 +380,23 @@ pub(crate) async fn handle_responses(
});
}
// Register all per-request state atomically
// Start debug trace
let trace = state.trace.start(&cascade_id, "POST /v1/responses", &model.name, body.stream);
if let Some(ref t) = trace {
t.set_client_request(crate::trace::ClientRequestSummary {
message_count: if is_tool_result_turn { 0 } else { 1 },
tool_count: body.tools.as_ref().map_or(0, |t| t.len()),
tool_round_count: tool_rounds.len(),
user_text_len: user_text.len(),
user_text_preview: user_text.chars().take(200).collect(),
system_prompt: body.instructions.is_some(),
has_image: image.is_some(),
}).await;
t.start_turn().await;
}
let mitm_gate = std::sync::Arc::new(tokio::sync::Notify::new());
let mitm_gate_clone = mitm_gate.clone();
state.mitm_store.register_request(crate::mitm::store::RequestContext {
cascade_id: cascade_id.clone(),
pending_user_text: user_text.clone(),
@@ -399,6 +410,9 @@ pub(crate) async fn handle_responses(
last_function_calls: Vec::new(),
call_id_to_name: std::collections::HashMap::new(),
created_at: std::time::Instant::now(),
gate: mitm_gate_clone,
trace_handle: trace.clone(),
trace_turn: 0,
}).await;
// Send REAL user text to LS
@@ -432,6 +446,29 @@ pub(crate) async fn handle_responses(
}
}
// Wait for MITM gate: 5s → 502 if MITM enabled
let gate_start = std::time::Instant::now();
let gate_matched = tokio::time::timeout(
std::time::Duration::from_secs(5),
mitm_gate.notified(),
).await;
let gate_wait_ms = gate_start.elapsed().as_millis() as u64;
if gate_matched.is_err() {
if state.mitm_enabled {
state.mitm_store.remove_request(&cascade_id).await;
if let Some(ref t) = trace { t.record_error("MITM gate timeout (5s)".to_string()).await; t.finish("mitm_timeout").await; }
return err_response(
StatusCode::BAD_GATEWAY,
"MITM proxy did not match request within 5s".to_string(),
"mitm_timeout",
);
}
warn!(cascade = %cascade_id, "MITM gate timeout (--no-mitm mode)");
} else {
debug!(cascade = %cascade_id, gate_wait_ms, "MITM gate signaled — request matched");
if let Some(ref t) = trace { t.record_mitm_match(0, gate_wait_ms).await; }
}
// Capture request params for response building
let req_params = RequestParams {
user_text: user_text.clone(),
@@ -462,6 +499,7 @@ pub(crate) async fn handle_responses(
body.timeout,
req_params,
mitm_rx,
trace,
)
.await
} else {
@@ -473,6 +511,7 @@ pub(crate) async fn handle_responses(
body.timeout,
req_params,
mitm_rx,
trace,
)
.await
}
@@ -595,6 +634,7 @@ async fn handle_responses_sync(
timeout: u64,
params: RequestParams,
mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
trace: Option<crate::trace::TraceHandle>,
) -> axum::response::Response {
let created_at = now_unix();
@@ -642,6 +682,30 @@ async fn handle_responses_sync(
&state.mitm_store, &cascade_id, &None, &params.user_text, "",
).await;
state.mitm_store.remove_request(&cascade_id).await;
// Record trace before usage is moved
if let Some(ref t) = trace {
let fc_summaries: Vec<crate::trace::FunctionCallSummary> = calls.iter().map(|fc| {
crate::trace::FunctionCallSummary {
name: fc.name.clone(),
args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
}
}).collect();
t.record_response(0, crate::trace::ResponseSummary {
text_len: 0,
thinking_len: 0,
text_preview: String::new(),
finish_reason: Some("tool_calls".to_string()),
function_calls: fc_summaries,
grounding: false,
}).await;
t.set_usage(crate::trace::TrackedUsage {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
cache_read: usage.input_tokens_details.cached_tokens,
}).await;
t.finish("tool_call").await;
}
let resp = build_response_object(
ResponseData {
id: response_id,
@@ -688,6 +752,24 @@ async fn handle_responses_sync(
let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
output_items.push(build_message_output(&msg_id, &acc_text));
// Record trace before usage is moved
if let Some(ref t) = trace {
t.record_response(0, crate::trace::ResponseSummary {
text_len: acc_text.len(),
thinking_len: acc_thinking.as_ref().map_or(0, |s| s.len()),
text_preview: acc_text.chars().take(200).collect(),
finish_reason: Some("stop".to_string()),
function_calls: Vec::new(),
grounding: false,
}).await;
t.set_usage(crate::trace::TrackedUsage {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
cache_read: usage.input_tokens_details.cached_tokens,
}).await;
t.finish("completed").await;
}
let resp = build_response_object(
ResponseData {
id: response_id,
@@ -705,6 +787,7 @@ async fn handle_responses_sync(
}
MitmEvent::UpstreamError(err) => {
state.mitm_store.remove_request(&cascade_id).await;
if let Some(ref t) = trace { t.record_error(format!("Upstream: {}", err.message.as_deref().unwrap_or("unknown"))).await; t.finish("upstream_error").await; }
return upstream_err_response(&err);
}
}
@@ -712,6 +795,7 @@ async fn handle_responses_sync(
// Timeout
state.mitm_store.remove_request(&cascade_id).await;
if let Some(ref t) = trace { t.record_error(format!("Timeout: {}s", timeout)).await; t.finish("timeout").await; }
return err_response(
StatusCode::GATEWAY_TIMEOUT,
format!("Timeout: no response from Google API after {timeout}s"),
@@ -772,6 +856,31 @@ async fn handle_responses_sync(
)
.await;
// Record trace before usage is moved
if let Some(ref t) = trace {
let fc_summaries: Vec<crate::trace::FunctionCallSummary> = calls.iter().map(|fc| {
crate::trace::FunctionCallSummary {
name: fc.name.clone(),
args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
}
}).collect();
t.record_response(0, crate::trace::ResponseSummary {
text_len: poll_result.text.len(),
thinking_len: poll_result.thinking.as_ref().map_or(0, |s| s.len()),
text_preview: String::new(),
finish_reason: Some("tool_calls".to_string()),
function_calls: fc_summaries,
grounding: false,
}).await;
t.set_usage(crate::trace::TrackedUsage {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
cache_read: usage.input_tokens_details.cached_tokens,
}).await;
t.finish("tool_call").await;
}
let resp = build_response_object(
ResponseData {
id: response_id,
@@ -809,6 +918,25 @@ async fn handle_responses_sync(
}
output_items.push(build_message_output(&msg_id, &poll_result.text));
// Record trace before usage is moved
if let Some(ref t) = trace {
t.record_response(0, crate::trace::ResponseSummary {
text_len: poll_result.text.len(),
thinking_len: thinking_text.as_ref().map_or(0, |s| s.len()),
text_preview: poll_result.text.chars().take(200).collect(),
finish_reason: Some("stop".to_string()),
function_calls: Vec::new(),
grounding: false,
}).await;
t.set_usage(crate::trace::TrackedUsage {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
cache_read: usage.input_tokens_details.cached_tokens,
}).await;
t.finish("completed").await;
}
let resp = build_response_object(
ResponseData {
id: response_id,
@@ -836,6 +964,7 @@ async fn handle_responses_stream(
timeout: u64,
params: RequestParams,
mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
trace: Option<crate::trace::TraceHandle>,
) -> axum::response::Response {
let stream = async_stream::stream! {
let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
@@ -1111,6 +1240,14 @@ async fn handle_responses_stream(
&params.user_text, "",
).await;
// Save trace usage before move
let trace_usage = crate::trace::TrackedUsage {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
cache_read: usage.input_tokens_details.cached_tokens,
};
let final_resp = build_response_object(
ResponseData {
id: response_id.clone(),
@@ -1132,6 +1269,19 @@ async fn handle_responses_stream(
"response": response_to_json(&final_resp),
}),
));
if let Some(ref t) = trace {
let fc_summaries: Vec<crate::trace::FunctionCallSummary> = calls.iter().map(|fc| crate::trace::FunctionCallSummary {
name: fc.name.clone(), args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
}).collect();
t.record_response(0, crate::trace::ResponseSummary {
text_len: 0, thinking_len: last_thinking.len(),
text_preview: String::new(),
finish_reason: Some("tool_calls".to_string()),
function_calls: fc_summaries, grounding: false,
}).await;
t.set_usage(trace_usage).await;
t.finish("tool_call").await;
}
state.mitm_store.remove_request(&cascade_id).await;
return;
}
@@ -1150,6 +1300,16 @@ async fn handle_responses_stream(
) {
yield Ok(evt);
}
if let Some(ref t) = trace {
t.record_response(0, crate::trace::ResponseSummary {
text_len: last_text.len(),
thinking_len: thinking_text.as_ref().map_or(0, |s| s.len()),
text_preview: last_text.chars().take(200).collect(),
finish_reason: Some("stop".to_string()),
function_calls: Vec::new(), grounding: false,
}).await;
t.finish("completed").await;
}
state.mitm_store.remove_request(&cascade_id).await;
return;
} else if !last_thinking.is_empty() {
@@ -1186,6 +1346,10 @@ async fn handle_responses_stream(
},
}),
));
if let Some(ref t) = trace {
t.record_error(format!("Upstream: {}", error_msg)).await;
t.finish("upstream_error").await;
}
state.mitm_store.remove_request(&cascade_id).await;
return;
}
@@ -1213,6 +1377,10 @@ async fn handle_responses_stream(
},
}),
));
if let Some(ref t) = trace {
t.record_error(format!("Timeout: {timeout}s")).await;
t.finish("timeout").await;
}
return;
}