fix: gemini route, usage capture, search timeout, and trace finalization
- Add missing /v1/gemini POST route and handler - Capture MitmEvent::Usage in gemini sync/streaming handlers - Add retry counter (max 3) to search handler to prevent hang - Add trace finalization at all gemini_sync channel exit points - Fix UpstreamError trace outcome label - Add timeout trace with error recording - Dispatch Usage before ResponseComplete in SSE flush
This commit is contained in:
@@ -435,21 +435,33 @@ pub(crate) async fn handle_completions(
|
||||
.map(|r| r.calls.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Build event channel for streaming
|
||||
let has_custom_tools = tools.is_some();
|
||||
let (mitm_rx, event_tx) = if has_custom_tools && body.stream {
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(64);
|
||||
(Some(rx), Some(tx))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
// Build event channel — always created for MITM response path
|
||||
let (tx, rx) = tokio::sync::mpsc::channel(64);
|
||||
let (mitm_rx, event_tx) = (Some(rx), tx);
|
||||
|
||||
// Build pending tool results from latest round
|
||||
let pending_tool_results = tool_rounds.last()
|
||||
.map(|r| r.results.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Register all per-request state atomically
|
||||
// Start debug trace
|
||||
let trace = state.trace.start(&cascade_id, "POST /v1/chat/completions", model_name, body.stream);
|
||||
if let Some(ref t) = trace {
|
||||
t.set_client_request(crate::trace::ClientRequestSummary {
|
||||
message_count: body.messages.len(),
|
||||
tool_count: body.tools.as_ref().map_or(0, |t| t.len()),
|
||||
tool_round_count: tool_rounds.len(),
|
||||
user_text_len: user_text.len(),
|
||||
user_text_preview: user_text.chars().take(200).collect(),
|
||||
system_prompt: body.messages.iter().any(|m| m.role == "system"),
|
||||
has_image: image.is_some(),
|
||||
}).await;
|
||||
// Start turn 0
|
||||
t.start_turn().await;
|
||||
}
|
||||
|
||||
let mitm_gate = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let mitm_gate_clone = mitm_gate.clone();
|
||||
state.mitm_store.register_request(crate::mitm::store::RequestContext {
|
||||
cascade_id: cascade_id.clone(),
|
||||
pending_user_text: user_text.clone(),
|
||||
@@ -463,6 +475,9 @@ pub(crate) async fn handle_completions(
|
||||
last_function_calls,
|
||||
call_id_to_name,
|
||||
created_at: std::time::Instant::now(),
|
||||
gate: mitm_gate_clone,
|
||||
trace_handle: trace.clone(),
|
||||
trace_turn: 0,
|
||||
}).await;
|
||||
|
||||
// Send REAL user text to LS
|
||||
@@ -480,6 +495,7 @@ pub(crate) async fn handle_completions(
|
||||
}
|
||||
Ok((status, _)) => {
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace { t.record_error(format!("Backend returned {status}")).await; t.finish("backend_error").await; }
|
||||
return err_response(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
format!("Backend returned {status}"),
|
||||
@@ -488,6 +504,7 @@ pub(crate) async fn handle_completions(
|
||||
}
|
||||
Err(e) => {
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace { t.record_error(format!("Send failed: {e}")).await; t.finish("send_error").await; }
|
||||
return err_response(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
format!("Send failed: {e}"),
|
||||
@@ -496,6 +513,34 @@ pub(crate) async fn handle_completions(
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for MITM gate: 5s → 502 if MITM enabled
|
||||
let gate_start = std::time::Instant::now();
|
||||
let gate_matched = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(5),
|
||||
mitm_gate.notified(),
|
||||
).await;
|
||||
let gate_wait_ms = gate_start.elapsed().as_millis() as u64;
|
||||
if gate_matched.is_err() {
|
||||
if state.mitm_enabled {
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace {
|
||||
t.record_error("MITM gate timeout (5s)".to_string()).await;
|
||||
t.finish("mitm_timeout").await;
|
||||
}
|
||||
return err_response(
|
||||
StatusCode::BAD_GATEWAY,
|
||||
"MITM proxy did not match request within 5s".to_string(),
|
||||
"mitm_timeout",
|
||||
);
|
||||
}
|
||||
warn!(cascade = %cascade_id, "MITM gate timeout (--no-mitm mode)");
|
||||
} else {
|
||||
debug!(cascade = %cascade_id, gate_wait_ms, "MITM gate signaled — request matched");
|
||||
if let Some(ref t) = trace {
|
||||
t.record_mitm_match(0, gate_wait_ms).await;
|
||||
}
|
||||
}
|
||||
|
||||
let completion_id = format!(
|
||||
"chatcmpl-{}",
|
||||
uuid::Uuid::new_v4().to_string().replace('-', "")
|
||||
@@ -515,6 +560,7 @@ pub(crate) async fn handle_completions(
|
||||
body.timeout,
|
||||
include_usage,
|
||||
mitm_rx,
|
||||
trace,
|
||||
)
|
||||
.await
|
||||
} else if n <= 1 {
|
||||
@@ -524,6 +570,7 @@ pub(crate) async fn handle_completions(
|
||||
model_name.to_string(),
|
||||
cascade_id,
|
||||
body.timeout,
|
||||
trace,
|
||||
)
|
||||
.await
|
||||
} else {
|
||||
@@ -653,6 +700,7 @@ async fn chat_completions_stream(
|
||||
timeout: u64,
|
||||
include_usage: bool,
|
||||
mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
|
||||
trace: Option<crate::trace::TraceHandle>,
|
||||
) -> axum::response::Response {
|
||||
let stream = async_stream::stream! {
|
||||
let start = std::time::Instant::now();
|
||||
@@ -774,6 +822,21 @@ async fn chat_completions_stream(
|
||||
}
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace {
|
||||
let (ipt, opt, crt2, tht) = if let Some(ref u) = last_usage {
|
||||
(u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
|
||||
} else { (0, 0, 0, 0) };
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: 0, thinking_len: 0, text_preview: String::new(),
|
||||
finish_reason: Some("tool_calls".to_string()),
|
||||
function_calls: calls.iter().map(|fc| crate::trace::FunctionCallSummary {
|
||||
name: fc.name.clone(), args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
|
||||
}).collect(),
|
||||
grounding: false,
|
||||
}).await;
|
||||
t.set_usage(crate::trace::TrackedUsage { input_tokens: ipt, output_tokens: opt, thinking_tokens: tht, cache_read: crt2 }).await;
|
||||
t.finish("tool_call").await;
|
||||
}
|
||||
return;
|
||||
}
|
||||
MitmEvent::ResponseComplete => {
|
||||
@@ -802,6 +865,19 @@ async fn chat_completions_stream(
|
||||
}
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace {
|
||||
let (ipt, opt, crt2, tht) = if let Some(ref u) = mitm {
|
||||
(u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
|
||||
} else { (0, 0, 0, 0) };
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: acc_text.len(), thinking_len: acc_thinking.len(),
|
||||
text_preview: acc_text.chars().take(200).collect(),
|
||||
finish_reason: Some("stop".to_string()),
|
||||
function_calls: Vec::new(), grounding: false,
|
||||
}).await;
|
||||
t.set_usage(crate::trace::TrackedUsage { input_tokens: ipt, output_tokens: opt, thinking_tokens: tht, cache_read: crt2 }).await;
|
||||
t.finish("completed").await;
|
||||
}
|
||||
return;
|
||||
} else if !acc_thinking.is_empty() && !did_unblock_ls {
|
||||
// Thinking-only response — LS needs follow-up API calls.
|
||||
@@ -844,6 +920,19 @@ async fn chat_completions_stream(
|
||||
}
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace {
|
||||
let (ipt, opt, crt2, tht) = if let Some(ref u) = mitm {
|
||||
(u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
|
||||
} else { (0, 0, 0, 0) };
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: 0, thinking_len: acc_thinking.len(),
|
||||
text_preview: String::new(),
|
||||
finish_reason: Some("stop".to_string()),
|
||||
function_calls: Vec::new(), grounding: false,
|
||||
}).await;
|
||||
t.set_usage(crate::trace::TrackedUsage { input_tokens: ipt, output_tokens: opt, thinking_tokens: tht, cache_read: crt2 }).await;
|
||||
t.finish("thinking_timeout").await;
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Don't break — wait for more channel events
|
||||
@@ -860,6 +949,14 @@ async fn chat_completions_stream(
|
||||
)));
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
state.mitm_store.remove_request(&cascade_id).await;
|
||||
if let Some(ref t) = trace {
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: 0, thinking_len: 0, text_preview: String::new(),
|
||||
finish_reason: Some("stop".to_string()),
|
||||
function_calls: Vec::new(), grounding: false,
|
||||
}).await;
|
||||
t.finish("empty_response").await;
|
||||
}
|
||||
return;
|
||||
}
|
||||
continue 'channel_loop;
|
||||
@@ -900,6 +997,15 @@ async fn chat_completions_stream(
|
||||
)));
|
||||
}
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
if let Some(ref t) = trace {
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: last_text.len(), thinking_len: last_thinking_len,
|
||||
text_preview: last_text.chars().take(200).collect(),
|
||||
finish_reason: Some("stop".to_string()),
|
||||
function_calls: Vec::new(), grounding: false,
|
||||
}).await;
|
||||
t.finish("channel_closed").await;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
// ── Fallback: LS steps (no MITM capture active) ──
|
||||
@@ -1046,6 +1152,7 @@ async fn chat_completions_sync(
|
||||
model_name: String,
|
||||
cascade_id: String,
|
||||
timeout: u64,
|
||||
trace: Option<crate::trace::TraceHandle>,
|
||||
) -> axum::response::Response {
|
||||
let result = poll_for_response(&state, &cascade_id, timeout).await;
|
||||
if let Some(ref err) = result.upstream_error {
|
||||
@@ -1084,6 +1191,27 @@ async fn chat_completions_sync(
|
||||
message["reasoning_content"] = serde_json::json!(thinking);
|
||||
}
|
||||
|
||||
// Record trace data
|
||||
if let Some(ref t) = trace {
|
||||
t.record_response(0, crate::trace::ResponseSummary {
|
||||
text_len: result.text.len(),
|
||||
thinking_len: result.thinking.as_ref().map_or(0, |s| s.len()),
|
||||
text_preview: result.text.chars().take(200).collect(),
|
||||
finish_reason: Some(finish_reason.to_string()),
|
||||
function_calls: Vec::new(),
|
||||
grounding: false,
|
||||
}).await;
|
||||
if prompt_tokens > 0 || completion_tokens > 0 {
|
||||
t.set_usage(crate::trace::TrackedUsage {
|
||||
input_tokens: prompt_tokens,
|
||||
output_tokens: completion_tokens,
|
||||
thinking_tokens: thinking_tokens,
|
||||
cache_read: cached_tokens,
|
||||
}).await;
|
||||
}
|
||||
t.finish("completed").await;
|
||||
}
|
||||
|
||||
Json(serde_json::json!({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion",
|
||||
|
||||
Reference in New Issue
Block a user