fix: gemini route, usage capture, search timeout, and trace finalization

- Add missing /v1/gemini POST route and handler - Capture MitmEvent::Usage in gemini sync/streaming handlers - Add retry counter (max 3) to search handler to prevent hang - Add trace finalization at all gemini_sync channel exit points - Fix UpstreamError trace outcome label - Add timeout trace with error recording - Dispatch Usage before ResponseComplete in SSE flush
2026-02-18 01:31:18 -06:00
parent 48674f65da
commit 28d3296c87
11 changed files with 1480 additions and 221 deletions
--- a/src/api/completions.rs
+++ b/src/api/completions.rs
@@ -435,21 +435,33 @@ pub(crate) async fn handle_completions(
        .map(|r| r.calls.clone())
        .unwrap_or_default();

-    // Build event channel for streaming
-    let has_custom_tools = tools.is_some();
-    let (mitm_rx, event_tx) = if has_custom_tools && body.stream {
-        let (tx, rx) = tokio::sync::mpsc::channel(64);
-        (Some(rx), Some(tx))
-    } else {
-        (None, None)
-    };
+    // Build event channel — always created for MITM response path
+    let (tx, rx) = tokio::sync::mpsc::channel(64);
+    let (mitm_rx, event_tx) = (Some(rx), tx);

    // Build pending tool results from latest round
    let pending_tool_results = tool_rounds.last()
        .map(|r| r.results.clone())
        .unwrap_or_default();

-    // Register all per-request state atomically
+    // Start debug trace
+    let trace = state.trace.start(&cascade_id, "POST /v1/chat/completions", model_name, body.stream);
+    if let Some(ref t) = trace {
+        t.set_client_request(crate::trace::ClientRequestSummary {
+            message_count: body.messages.len(),
+            tool_count: body.tools.as_ref().map_or(0, |t| t.len()),
+            tool_round_count: tool_rounds.len(),
+            user_text_len: user_text.len(),
+            user_text_preview: user_text.chars().take(200).collect(),
+            system_prompt: body.messages.iter().any(|m| m.role == "system"),
+            has_image: image.is_some(),
+        }).await;
+        // Start turn 0
+        t.start_turn().await;
+    }
+
+    let mitm_gate = std::sync::Arc::new(tokio::sync::Notify::new());
+    let mitm_gate_clone = mitm_gate.clone();
    state.mitm_store.register_request(crate::mitm::store::RequestContext {
        cascade_id: cascade_id.clone(),
        pending_user_text: user_text.clone(),
@@ -463,6 +475,9 @@ pub(crate) async fn handle_completions(
        last_function_calls,
        call_id_to_name,
        created_at: std::time::Instant::now(),
+        gate: mitm_gate_clone,
+        trace_handle: trace.clone(),
+        trace_turn: 0,
    }).await;

    // Send REAL user text to LS
@@ -480,6 +495,7 @@ pub(crate) async fn handle_completions(
        }
        Ok((status, _)) => {
            state.mitm_store.remove_request(&cascade_id).await;
+            if let Some(ref t) = trace { t.record_error(format!("Backend returned {status}")).await; t.finish("backend_error").await; }
            return err_response(
                StatusCode::BAD_GATEWAY,
                format!("Backend returned {status}"),
@@ -488,6 +504,7 @@ pub(crate) async fn handle_completions(
        }
        Err(e) => {
            state.mitm_store.remove_request(&cascade_id).await;
+            if let Some(ref t) = trace { t.record_error(format!("Send failed: {e}")).await; t.finish("send_error").await; }
            return err_response(
                StatusCode::BAD_GATEWAY,
                format!("Send failed: {e}"),
@@ -496,6 +513,34 @@ pub(crate) async fn handle_completions(
        }
    }

+    // Wait for MITM gate: 5s → 502 if MITM enabled
+    let gate_start = std::time::Instant::now();
+    let gate_matched = tokio::time::timeout(
+        std::time::Duration::from_secs(5),
+        mitm_gate.notified(),
+    ).await;
+    let gate_wait_ms = gate_start.elapsed().as_millis() as u64;
+    if gate_matched.is_err() {
+        if state.mitm_enabled {
+            state.mitm_store.remove_request(&cascade_id).await;
+            if let Some(ref t) = trace {
+                t.record_error("MITM gate timeout (5s)".to_string()).await;
+                t.finish("mitm_timeout").await;
+            }
+            return err_response(
+                StatusCode::BAD_GATEWAY,
+                "MITM proxy did not match request within 5s".to_string(),
+                "mitm_timeout",
+            );
+        }
+        warn!(cascade = %cascade_id, "MITM gate timeout (--no-mitm mode)");
+    } else {
+        debug!(cascade = %cascade_id, gate_wait_ms, "MITM gate signaled — request matched");
+        if let Some(ref t) = trace {
+            t.record_mitm_match(0, gate_wait_ms).await;
+        }
+    }
+
    let completion_id = format!(
        "chatcmpl-{}",
        uuid::Uuid::new_v4().to_string().replace('-', "")
@@ -515,6 +560,7 @@ pub(crate) async fn handle_completions(
            body.timeout,
            include_usage,
            mitm_rx,
+            trace,
        )
        .await
    } else if n <= 1 {
@@ -524,6 +570,7 @@ pub(crate) async fn handle_completions(
            model_name.to_string(),
            cascade_id,
            body.timeout,
+            trace,
        )
        .await
    } else {
@@ -653,6 +700,7 @@ async fn chat_completions_stream(
    timeout: u64,
    include_usage: bool,
    mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
+    trace: Option<crate::trace::TraceHandle>,
 ) -> axum::response::Response {
    let stream = async_stream::stream! {
        let start = std::time::Instant::now();
@@ -774,6 +822,21 @@ async fn chat_completions_stream(
                            }
                            yield Ok(Event::default().data("[DONE]"));
                            state.mitm_store.remove_request(&cascade_id).await;
+                            if let Some(ref t) = trace {
+                                let (ipt, opt, crt2, tht) = if let Some(ref u) = last_usage {
+                                    (u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
+                                } else { (0, 0, 0, 0) };
+                                t.record_response(0, crate::trace::ResponseSummary {
+                                    text_len: 0, thinking_len: 0, text_preview: String::new(),
+                                    finish_reason: Some("tool_calls".to_string()),
+                                    function_calls: calls.iter().map(|fc| crate::trace::FunctionCallSummary {
+                                        name: fc.name.clone(), args_preview: serde_json::to_string(&fc.args).unwrap_or_default().chars().take(200).collect(),
+                                    }).collect(),
+                                    grounding: false,
+                                }).await;
+                                t.set_usage(crate::trace::TrackedUsage { input_tokens: ipt, output_tokens: opt, thinking_tokens: tht, cache_read: crt2 }).await;
+                                t.finish("tool_call").await;
+                            }
                            return;
                        }
                        MitmEvent::ResponseComplete => {
@@ -802,6 +865,19 @@ async fn chat_completions_stream(
                                }
                                yield Ok(Event::default().data("[DONE]"));
                                state.mitm_store.remove_request(&cascade_id).await;
+                                if let Some(ref t) = trace {
+                                    let (ipt, opt, crt2, tht) = if let Some(ref u) = mitm {
+                                        (u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
+                                    } else { (0, 0, 0, 0) };
+                                    t.record_response(0, crate::trace::ResponseSummary {
+                                        text_len: acc_text.len(), thinking_len: acc_thinking.len(),
+                                        text_preview: acc_text.chars().take(200).collect(),
+                                        finish_reason: Some("stop".to_string()),
+                                        function_calls: Vec::new(), grounding: false,
+                                    }).await;
+                                    t.set_usage(crate::trace::TrackedUsage { input_tokens: ipt, output_tokens: opt, thinking_tokens: tht, cache_read: crt2 }).await;
+                                    t.finish("completed").await;
+                                }
                                return;
                            } else if !acc_thinking.is_empty() && !did_unblock_ls {
                                // Thinking-only response — LS needs follow-up API calls.
@@ -844,6 +920,19 @@ async fn chat_completions_stream(
                                    }
                                    yield Ok(Event::default().data("[DONE]"));
                                    state.mitm_store.remove_request(&cascade_id).await;
+                                    if let Some(ref t) = trace {
+                                        let (ipt, opt, crt2, tht) = if let Some(ref u) = mitm {
+                                            (u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
+                                        } else { (0, 0, 0, 0) };
+                                        t.record_response(0, crate::trace::ResponseSummary {
+                                            text_len: 0, thinking_len: acc_thinking.len(),
+                                            text_preview: String::new(),
+                                            finish_reason: Some("stop".to_string()),
+                                            function_calls: Vec::new(), grounding: false,
+                                        }).await;
+                                        t.set_usage(crate::trace::TrackedUsage { input_tokens: ipt, output_tokens: opt, thinking_tokens: tht, cache_read: crt2 }).await;
+                                        t.finish("thinking_timeout").await;
+                                    }
                                    return;
                                }
                                // Don't break — wait for more channel events
@@ -860,6 +949,14 @@ async fn chat_completions_stream(
                                    )));
                                    yield Ok(Event::default().data("[DONE]"));
                                    state.mitm_store.remove_request(&cascade_id).await;
+                                    if let Some(ref t) = trace {
+                                        t.record_response(0, crate::trace::ResponseSummary {
+                                            text_len: 0, thinking_len: 0, text_preview: String::new(),
+                                            finish_reason: Some("stop".to_string()),
+                                            function_calls: Vec::new(), grounding: false,
+                                        }).await;
+                                        t.finish("empty_response").await;
+                                    }
                                    return;
                                }
                                continue 'channel_loop;
@@ -900,6 +997,15 @@ async fn chat_completions_stream(
                    )));
                }
                yield Ok(Event::default().data("[DONE]"));
+                if let Some(ref t) = trace {
+                    t.record_response(0, crate::trace::ResponseSummary {
+                        text_len: last_text.len(), thinking_len: last_thinking_len,
+                        text_preview: last_text.chars().take(200).collect(),
+                        finish_reason: Some("stop".to_string()),
+                        function_calls: Vec::new(), grounding: false,
+                    }).await;
+                    t.finish("channel_closed").await;
+                }
                return;
            } else {
                // ── Fallback: LS steps (no MITM capture active) ──
@@ -1046,6 +1152,7 @@ async fn chat_completions_sync(
    model_name: String,
    cascade_id: String,
    timeout: u64,
+    trace: Option<crate::trace::TraceHandle>,
 ) -> axum::response::Response {
    let result = poll_for_response(&state, &cascade_id, timeout).await;
    if let Some(ref err) = result.upstream_error {
@@ -1084,6 +1191,27 @@ async fn chat_completions_sync(
        message["reasoning_content"] = serde_json::json!(thinking);
    }

+    // Record trace data
+    if let Some(ref t) = trace {
+        t.record_response(0, crate::trace::ResponseSummary {
+            text_len: result.text.len(),
+            thinking_len: result.thinking.as_ref().map_or(0, |s| s.len()),
+            text_preview: result.text.chars().take(200).collect(),
+            finish_reason: Some(finish_reason.to_string()),
+            function_calls: Vec::new(),
+            grounding: false,
+        }).await;
+        if prompt_tokens > 0 || completion_tokens > 0 {
+            t.set_usage(crate::trace::TrackedUsage {
+                input_tokens: prompt_tokens,
+                output_tokens: completion_tokens,
+                thinking_tokens: thinking_tokens,
+                cache_read: cached_tokens,
+            }).await;
+        }
+        t.finish("completed").await;
+    }
+
    Json(serde_json::json!({
        "id": completion_id,
        "object": "chat.completion",