fix: gemini route, usage capture, search timeout, and trace finalization
- Add missing /v1/gemini POST route and handler - Capture MitmEvent::Usage in gemini sync/streaming handlers - Add retry counter (max 3) to search handler to prevent hang - Add trace finalization at all gemini_sync channel exit points - Fix UpstreamError trace outcome label - Add timeout trace with error recording - Dispatch Usage before ResponseComplete in SSE flush
This commit is contained in:
@@ -45,6 +45,10 @@ pub struct ApiUsage {
|
||||
pub grpc_method: Option<String>,
|
||||
/// Timestamp when this usage was captured.
|
||||
pub captured_at: u64,
|
||||
/// Thinking signature from Google's response (base64 opaque blob).
|
||||
/// Required for multi-turn with thinking models.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub thinking_signature: Option<String>,
|
||||
}
|
||||
|
||||
/// A captured function call from Google's API response.
|
||||
@@ -188,8 +192,7 @@ pub struct RequestContext {
|
||||
/// Real user text for MITM injection (LS receives "." instead).
|
||||
pub pending_user_text: String,
|
||||
/// Event channel for real-time streaming from MITM → API handler.
|
||||
/// Only present when custom tools are active.
|
||||
pub event_channel: Option<mpsc::Sender<MitmEvent>>,
|
||||
pub event_channel: mpsc::Sender<MitmEvent>,
|
||||
/// Client-specified generation parameters (temperature, top_p, etc.).
|
||||
pub generation_params: Option<GenerationParams>,
|
||||
/// Image to inject into the Google API request.
|
||||
@@ -208,6 +211,13 @@ pub struct RequestContext {
|
||||
pub call_id_to_name: HashMap<String, String>,
|
||||
/// When this context was created (for TTL cleanup).
|
||||
pub created_at: Instant,
|
||||
/// Gate: signaled when MITM takes this context.
|
||||
/// API handlers wait on this with a timeout to detect match failures.
|
||||
pub gate: Arc<tokio::sync::Notify>,
|
||||
/// Debug trace handle (if tracing is enabled).
|
||||
pub trace_handle: Option<crate::trace::TraceHandle>,
|
||||
/// Current turn index in the trace (for multi-turn tracking).
|
||||
pub trace_turn: usize,
|
||||
}
|
||||
|
||||
// ─── MitmStore ───────────────────────────────────────────────────────────────
|
||||
@@ -295,8 +305,9 @@ impl MitmStore {
|
||||
/// Called by the MITM proxy when intercepting the LS's outbound request.
|
||||
pub async fn take_request(&self, cascade_id: &str) -> Option<RequestContext> {
|
||||
let ctx = self.pending_requests.write().await.remove(cascade_id);
|
||||
if ctx.is_some() {
|
||||
debug!(cascade = %cascade_id, "Took request context");
|
||||
if let Some(ref c) = ctx {
|
||||
c.gate.notify_one();
|
||||
debug!(cascade = %cascade_id, "Took request context (gate signaled)");
|
||||
}
|
||||
ctx
|
||||
}
|
||||
@@ -315,8 +326,9 @@ impl MitmStore {
|
||||
.map(|(k, _)| k.clone());
|
||||
if let Some(key) = latest_key {
|
||||
let ctx = pending.remove(&key);
|
||||
if ctx.is_some() {
|
||||
debug!(cascade = %key, "Took latest request context (fallback)");
|
||||
if let Some(ref c) = ctx {
|
||||
c.gate.notify_one();
|
||||
debug!(cascade = %key, "Took latest request context (fallback, gate signaled)");
|
||||
}
|
||||
ctx
|
||||
} else {
|
||||
@@ -577,12 +589,42 @@ impl MitmStore {
|
||||
|
||||
// ── Compat shims for streaming tool-call loops ──────────────────────
|
||||
|
||||
/// Update the event channel on an existing request context.
|
||||
/// Used by streaming loop handlers when re-registering for a new tool round.
|
||||
/// Update the event channel on an existing request context,
|
||||
/// or re-register a minimal context if it was already consumed by `take_request`.
|
||||
///
|
||||
/// This is critical for thinking-only intermediate responses: the MITM proxy
|
||||
/// consumes the context via `take_request`, but the handler needs to re-install
|
||||
/// a channel for the LS's follow-up request.
|
||||
pub async fn set_channel(&self, cascade_id: &str, tx: mpsc::Sender<MitmEvent>) {
|
||||
self.update_request(cascade_id, |ctx| {
|
||||
ctx.event_channel = Some(tx);
|
||||
let updated = self.update_request(cascade_id, |ctx| {
|
||||
ctx.event_channel = tx.clone();
|
||||
}).await;
|
||||
if !updated {
|
||||
// Context was already consumed — re-register a minimal one
|
||||
// so the MITM proxy can match the follow-up request.
|
||||
let gate = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
self.register_request(RequestContext {
|
||||
cascade_id: cascade_id.to_string(),
|
||||
pending_user_text: String::new(),
|
||||
event_channel: tx,
|
||||
generation_params: None,
|
||||
pending_image: None,
|
||||
tools: None,
|
||||
tool_config: None,
|
||||
pending_tool_results: Vec::new(),
|
||||
tool_rounds: Vec::new(),
|
||||
last_function_calls: Vec::new(),
|
||||
call_id_to_name: std::collections::HashMap::new(),
|
||||
created_at: std::time::Instant::now(),
|
||||
gate,
|
||||
trace_handle: None,
|
||||
trace_turn: 0,
|
||||
}).await;
|
||||
tracing::debug!(
|
||||
cascade = cascade_id,
|
||||
"set_channel: re-registered minimal context (original was consumed)"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// No-op. Upstream errors are now delivered through the event channel.
|
||||
|
||||
Reference in New Issue
Block a user