fix: gemini route, usage capture, search timeout, and trace finalization
- Add missing /v1/gemini POST route and handler - Capture MitmEvent::Usage in gemini sync/streaming handlers - Add retry counter (max 3) to search handler to prevent hang - Add trace finalization at all gemini_sync channel exit points - Fix UpstreamError trace outcome label - Add timeout trace with error recording - Dispatch Usage before ResponseComplete in SSE flush
This commit is contained in:
@@ -11,7 +11,7 @@ use axum::{
|
||||
use rand::Rng;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, info};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use super::models::{lookup_model, DEFAULT_MODEL, MODELS};
|
||||
use super::polling::{
|
||||
@@ -364,14 +364,9 @@ pub(crate) async fn handle_responses(
|
||||
}
|
||||
});
|
||||
|
||||
// Build event channel
|
||||
let has_custom_tools = tools.is_some();
|
||||
let (mitm_rx, event_tx) = if has_custom_tools {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(64);
|
||||
(Some(rx), Some(tx))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
// Build event channel — always created for MITM response path
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(64);
|
||||
let (mitm_rx, event_tx) = (Some(rx), tx);
|
||||
|
||||
// Build tool rounds now that cascade_id is known
|
||||
let mut tool_rounds: Vec<crate::mitm::store::ToolRound> = Vec::new();
|
||||
@@ -385,7 +380,23 @@ pub(crate) async fn handle_responses(
|
||||
});
|
||||
}
|
||||
|
||||
// Register all per-request state atomically
|
||||
// Start debug trace
|
||||
let trace = state.trace.start(&cascade_id, "POST /v1/responses", &model.name, body.stream);
|
||||
if let Some(ref t) = trace {
|
||||
t.set_client_request(crate::trace::ClientRequestSummary {
|
||||
message_count: if is_tool_result_turn { 0 } else { 1 },
|
||||
tool_count: body.tools.as_ref().map_or(0, |t| t.len()),
|
||||
tool_round_count: tool_rounds.len(),
|
||||
user_text_len: user_text.len(),
|
||||
user_text_preview: user_text.chars().take(200).collect(),
|
||||
system_prompt: body.instructions.is_some(),
|
||||
has_image: image.is_some(),
|
||||
}).await;
|
||||
t.start_turn().await;
|
||||
}
|
||||
|
||||
let mitm_gate = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let mitm_gate_clone = mitm_gate.clone();
|
||||
state.mitm_store.register_request(crate::mitm::store::RequestContext {
|
||||
cascade_id: cascade_id.clone(),
|
||||
pending_user_text: user_text.clone(),
|
||||
@@ -399,6 +410,9 @@ pub(crate) async fn handle_responses(
|
||||
last_function_calls: Vec::new(),
|
||||
call_id_to_name: std::collections::HashMap::new(),
|
||||
created_at: std::time::Instant::now(),
|
||||
gate: mitm_gate_clone,
|
||||
trace_handle: trace.clone(),
|
||||
trace_turn: 0,
|
||||
}).await;
|
||||
|
||||
// Send REAL user text to LS
|
||||
@@ -432,6 +446,29 @@ pub(crate) async fn handle_responses(
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for MITM gate: 5s → 502 if MITM enabled
|
||||
let gate_start = std::time::Instant::now();
|
||||
let gate_matched = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(5),
|
||||
mitm_gate.notified(),
|
||||
).await;
|
||||
let gate_wait_ms = gate_start.elapsed().as_millis() as u64;
|
||||
if gate_matched.is_err() {
|
||||
if state.mitm_enabled {
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace { t.record_error("MITM gate timeout (5s)".to_string()).await; t.finish("mitm_timeout").await; }
|
||||
return err_response(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
"MITM proxy did not match request within 5s".to_string(),
|
||||
"mitm_timeout",
|
||||
);
|
||||
}
|
||||
warn!(cascade = %cascade_id, "MITM gate timeout (--no-mitm mode)");
|
||||
} else {
|
||||
debug!(cascade = %cascade_id, gate_wait_ms, "MITM gate signaled — request matched");
|
||||
if let Some(ref t) = trace { t.record_mitm_match(0, gate_wait_ms).await; }
|
||||
}
|
||||
|
||||
// Capture request params for response building
|
||||
let req_params = RequestParams {
|
||||
user_text: user_text.clone(),
|
||||
@@ -462,6 +499,7 @@ pub(crate) async fn handle_responses(
|
||||
body.timeout,
|
||||
req_params,
|
||||
mitm_rx,
|
||||
trace,
|
||||
)
|
||||
.await
|
||||
} else {
|
||||
@@ -473,6 +511,7 @@ pub(crate) async fn handle_responses(
|
||||
body.timeout,
|
||||
req_params,
|
||||
mitm_rx,
|
||||
trace,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -595,6 +634,7 @@ async fn handle_responses_sync(
|
||||
timeout: u64,
|
||||
params: RequestParams,
|
||||
mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
|
||||
trace: Option<crate::trace::TraceHandle>,
|
||||
) -> axum::response::Response {
|
||||
let created_at = now_unix();
|
||||
|
||||
@@ -642,6 +682,30 @@ async fn handle_responses_sync(
|
||||
&state.mitm_store, &cascade_id, &None, ¶ms.user_text, "",
|
||||
).await;
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
// Record trace before usage is moved
|
||||
if let Some(ref t) = trace {
|
||||
let fc_summaries: Vec<crate::trace::FunctionCallSummary> = calls.iter().map(|fc| {
|
||||
crate::trace::FunctionCallSummary {
|
||||
name: fc.name.clone(),
|
||||
args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
|
||||
}
|
||||
}).collect();
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: 0,
|
||||
thinking_len: 0,
|
||||
text_preview: String::new(),
|
||||
finish_reason: Some("tool_calls".to_string()),
|
||||
function_calls: fc_summaries,
|
||||
grounding: false,
|
||||
}).await;
|
||||
t.set_usage(crate::trace::TrackedUsage {
|
||||
input_tokens: usage.input_tokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
|
||||
cache_read: usage.input_tokens_details.cached_tokens,
|
||||
}).await;
|
||||
t.finish("tool_call").await;
|
||||
}
|
||||
let resp = build_response_object(
|
||||
ResponseData {
|
||||
id: response_id,
|
||||
@@ -688,6 +752,24 @@ async fn handle_responses_sync(
|
||||
let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
|
||||
output_items.push(build_message_output(&msg_id, &acc_text));
|
||||
|
||||
// Record trace before usage is moved
|
||||
if let Some(ref t) = trace {
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: acc_text.len(),
|
||||
thinking_len: acc_thinking.as_ref().map_or(0, |s| s.len()),
|
||||
text_preview: acc_text.chars().take(200).collect(),
|
||||
finish_reason: Some("stop".to_string()),
|
||||
function_calls: Vec::new(),
|
||||
grounding: false,
|
||||
}).await;
|
||||
t.set_usage(crate::trace::TrackedUsage {
|
||||
input_tokens: usage.input_tokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
|
||||
cache_read: usage.input_tokens_details.cached_tokens,
|
||||
}).await;
|
||||
t.finish("completed").await;
|
||||
}
|
||||
let resp = build_response_object(
|
||||
ResponseData {
|
||||
id: response_id,
|
||||
@@ -705,6 +787,7 @@ async fn handle_responses_sync(
|
||||
}
|
||||
MitmEvent::UpstreamError(err) => {
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace { t.record_error(format!("Upstream: {}", err.message.as_deref().unwrap_or("unknown"))).await; t.finish("upstream_error").await; }
|
||||
return upstream_err_response(&err);
|
||||
}
|
||||
}
|
||||
@@ -712,6 +795,7 @@ async fn handle_responses_sync(
|
||||
|
||||
// Timeout
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace { t.record_error(format!("Timeout: {}s", timeout)).await; t.finish("timeout").await; }
|
||||
return err_response(
|
||||
StatusCode::GATEWAY_TIMEOUT,
|
||||
format!("Timeout: no response from Google API after {timeout}s"),
|
||||
@@ -772,6 +856,31 @@ async fn handle_responses_sync(
|
||||
)
|
||||
.await;
|
||||
|
||||
// Record trace before usage is moved
|
||||
if let Some(ref t) = trace {
|
||||
let fc_summaries: Vec<crate::trace::FunctionCallSummary> = calls.iter().map(|fc| {
|
||||
crate::trace::FunctionCallSummary {
|
||||
name: fc.name.clone(),
|
||||
args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
|
||||
}
|
||||
}).collect();
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: poll_result.text.len(),
|
||||
thinking_len: poll_result.thinking.as_ref().map_or(0, |s| s.len()),
|
||||
text_preview: String::new(),
|
||||
finish_reason: Some("tool_calls".to_string()),
|
||||
function_calls: fc_summaries,
|
||||
grounding: false,
|
||||
}).await;
|
||||
t.set_usage(crate::trace::TrackedUsage {
|
||||
input_tokens: usage.input_tokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
|
||||
cache_read: usage.input_tokens_details.cached_tokens,
|
||||
}).await;
|
||||
t.finish("tool_call").await;
|
||||
}
|
||||
|
||||
let resp = build_response_object(
|
||||
ResponseData {
|
||||
id: response_id,
|
||||
@@ -809,6 +918,25 @@ async fn handle_responses_sync(
|
||||
}
|
||||
output_items.push(build_message_output(&msg_id, &poll_result.text));
|
||||
|
||||
// Record trace before usage is moved
|
||||
if let Some(ref t) = trace {
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: poll_result.text.len(),
|
||||
thinking_len: thinking_text.as_ref().map_or(0, |s| s.len()),
|
||||
text_preview: poll_result.text.chars().take(200).collect(),
|
||||
finish_reason: Some("stop".to_string()),
|
||||
function_calls: Vec::new(),
|
||||
grounding: false,
|
||||
}).await;
|
||||
t.set_usage(crate::trace::TrackedUsage {
|
||||
input_tokens: usage.input_tokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
|
||||
cache_read: usage.input_tokens_details.cached_tokens,
|
||||
}).await;
|
||||
t.finish("completed").await;
|
||||
}
|
||||
|
||||
let resp = build_response_object(
|
||||
ResponseData {
|
||||
id: response_id,
|
||||
@@ -836,6 +964,7 @@ async fn handle_responses_stream(
|
||||
timeout: u64,
|
||||
params: RequestParams,
|
||||
mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
|
||||
trace: Option<crate::trace::TraceHandle>,
|
||||
) -> axum::response::Response {
|
||||
let stream = async_stream::stream! {
|
||||
let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
|
||||
@@ -1111,6 +1240,14 @@ async fn handle_responses_stream(
|
||||
¶ms.user_text, "",
|
||||
).await;
|
||||
|
||||
// Save trace usage before move
|
||||
let trace_usage = crate::trace::TrackedUsage {
|
||||
input_tokens: usage.input_tokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
thinking_tokens: usage.output_tokens_details.reasoning_tokens,
|
||||
cache_read: usage.input_tokens_details.cached_tokens,
|
||||
};
|
||||
|
||||
let final_resp = build_response_object(
|
||||
ResponseData {
|
||||
id: response_id.clone(),
|
||||
@@ -1132,6 +1269,19 @@ async fn handle_responses_stream(
|
||||
"response": response_to_json(&final_resp),
|
||||
}),
|
||||
));
|
||||
if let Some(ref t) = trace {
|
||||
let fc_summaries: Vec<crate::trace::FunctionCallSummary> = calls.iter().map(|fc| crate::trace::FunctionCallSummary {
|
||||
name: fc.name.clone(), args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
|
||||
}).collect();
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: 0, thinking_len: last_thinking.len(),
|
||||
text_preview: String::new(),
|
||||
finish_reason: Some("tool_calls".to_string()),
|
||||
function_calls: fc_summaries, grounding: false,
|
||||
}).await;
|
||||
t.set_usage(trace_usage).await;
|
||||
t.finish("tool_call").await;
|
||||
}
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
return;
|
||||
}
|
||||
@@ -1150,6 +1300,16 @@ async fn handle_responses_stream(
|
||||
) {
|
||||
yield Ok(evt);
|
||||
}
|
||||
if let Some(ref t) = trace {
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: last_text.len(),
|
||||
thinking_len: thinking_text.as_ref().map_or(0, |s| s.len()),
|
||||
text_preview: last_text.chars().take(200).collect(),
|
||||
finish_reason: Some("stop".to_string()),
|
||||
function_calls: Vec::new(), grounding: false,
|
||||
}).await;
|
||||
t.finish("completed").await;
|
||||
}
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
return;
|
||||
} else if !last_thinking.is_empty() {
|
||||
@@ -1186,6 +1346,10 @@ async fn handle_responses_stream(
|
||||
},
|
||||
}),
|
||||
));
|
||||
if let Some(ref t) = trace {
|
||||
t.record_error(format!("Upstream: {}", error_msg)).await;
|
||||
t.finish("upstream_error").await;
|
||||
}
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
return;
|
||||
}
|
||||
@@ -1213,6 +1377,10 @@ async fn handle_responses_stream(
|
||||
},
|
||||
}),
|
||||
));
|
||||
if let Some(ref t) = trace {
|
||||
t.record_error(format!("Timeout: {timeout}s")).await;
|
||||
t.finish("timeout").await;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user