fix: gemini route, usage capture, search timeout, and trace finalization

- Add missing /v1/gemini POST route and handler
- Capture MitmEvent::Usage in gemini sync/streaming handlers
- Add retry counter (max 3) to search handler to prevent hang
- Add trace finalization at all gemini_sync channel exit points
- Fix UpstreamError trace outcome label
- Add timeout trace with error recording
- Dispatch Usage before ResponseComplete in SSE flush
This commit is contained in:
Nikketryhard
2026-02-18 01:31:18 -06:00
parent 48674f65da
commit 28d3296c87
11 changed files with 1480 additions and 221 deletions

View File

@@ -45,6 +45,10 @@ pub struct ApiUsage {
pub grpc_method: Option<String>,
/// Timestamp when this usage was captured.
pub captured_at: u64,
/// Thinking signature from Google's response (base64 opaque blob).
/// Required for multi-turn with thinking models.
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking_signature: Option<String>,
}
/// A captured function call from Google's API response.
@@ -188,8 +192,7 @@ pub struct RequestContext {
/// Real user text for MITM injection (LS receives "." instead).
pub pending_user_text: String,
/// Event channel for real-time streaming from MITM → API handler.
/// Only present when custom tools are active.
pub event_channel: Option<mpsc::Sender<MitmEvent>>,
pub event_channel: mpsc::Sender<MitmEvent>,
/// Client-specified generation parameters (temperature, top_p, etc.).
pub generation_params: Option<GenerationParams>,
/// Image to inject into the Google API request.
@@ -208,6 +211,13 @@ pub struct RequestContext {
pub call_id_to_name: HashMap<String, String>,
/// When this context was created (for TTL cleanup).
pub created_at: Instant,
/// Gate: signaled when MITM takes this context.
/// API handlers wait on this with a timeout to detect match failures.
pub gate: Arc<tokio::sync::Notify>,
/// Debug trace handle (if tracing is enabled).
pub trace_handle: Option<crate::trace::TraceHandle>,
/// Current turn index in the trace (for multi-turn tracking).
pub trace_turn: usize,
}
// ─── MitmStore ───────────────────────────────────────────────────────────────
@@ -295,8 +305,9 @@ impl MitmStore {
/// Called by the MITM proxy when intercepting the LS's outbound request.
pub async fn take_request(&self, cascade_id: &str) -> Option<RequestContext> {
let ctx = self.pending_requests.write().await.remove(cascade_id);
if ctx.is_some() {
debug!(cascade = %cascade_id, "Took request context");
if let Some(ref c) = ctx {
c.gate.notify_one();
debug!(cascade = %cascade_id, "Took request context (gate signaled)");
}
ctx
}
@@ -315,8 +326,9 @@ impl MitmStore {
.map(|(k, _)| k.clone());
if let Some(key) = latest_key {
let ctx = pending.remove(&key);
if ctx.is_some() {
debug!(cascade = %key, "Took latest request context (fallback)");
if let Some(ref c) = ctx {
c.gate.notify_one();
debug!(cascade = %key, "Took latest request context (fallback, gate signaled)");
}
ctx
} else {
@@ -577,12 +589,42 @@ impl MitmStore {
// ── Compat shims for streaming tool-call loops ──────────────────────
/// Update the event channel on an existing request context.
/// Used by streaming loop handlers when re-registering for a new tool round.
/// Update the event channel on an existing request context,
/// or re-register a minimal context if it was already consumed by `take_request`.
///
/// This is critical for thinking-only intermediate responses: the MITM proxy
/// consumes the context via `take_request`, but the handler needs to re-install
/// a channel for the LS's follow-up request.
pub async fn set_channel(&self, cascade_id: &str, tx: mpsc::Sender<MitmEvent>) {
self.update_request(cascade_id, |ctx| {
ctx.event_channel = Some(tx);
let updated = self.update_request(cascade_id, |ctx| {
ctx.event_channel = tx.clone();
}).await;
if !updated {
// Context was already consumed — re-register a minimal one
// so the MITM proxy can match the follow-up request.
let gate = std::sync::Arc::new(tokio::sync::Notify::new());
self.register_request(RequestContext {
cascade_id: cascade_id.to_string(),
pending_user_text: String::new(),
event_channel: tx,
generation_params: None,
pending_image: None,
tools: None,
tool_config: None,
pending_tool_results: Vec::new(),
tool_rounds: Vec::new(),
last_function_calls: Vec::new(),
call_id_to_name: std::collections::HashMap::new(),
created_at: std::time::Instant::now(),
gate,
trace_handle: None,
trace_turn: 0,
}).await;
tracing::debug!(
cascade = cascade_id,
"set_channel: re-registered minimal context (original was consumed)"
);
}
}
/// No-op. Upstream errors are now delivered through the event channel.