feat: propagate Google upstream errors to client

When Google returns an error (400, 429, 500, etc.), the MITM proxy now captures it and the API handlers return it immediately instead of hanging until timeout. - UpstreamError struct stored in MitmStore - MITM proxy parses Google error JSON (message + status) - Polling handler checks for upstream errors each cycle - Streaming handlers emit response.failed / SSE error events - Error status mapped to OpenAI-style types (invalid_request_error, rate_limit_error, authentication_error, server_error, etc.) - All handlers clear stale errors at request start
2026-02-15 18:19:38 -06:00
parent 371c57bab0
commit 2882f7cce2
8 changed files with 195 additions and 14 deletions
--- a/src/api/completions.rs
+++ b/src/api/completions.rs
@@ -12,7 +12,7 @@ use tracing::{debug, info, warn};
 use super::models::{lookup_model, DEFAULT_MODEL, MODELS};
 use super::polling::{extract_response_text, extract_thinking_content, is_response_done, poll_for_response};
 use super::types::*;
-use super::util::{err_response, now_unix};
+use super::util::{err_response, upstream_err_response, now_unix};
 use super::AppState;

 /// Extract a conversation/session ID from a flexible JSON value.
@@ -488,8 +488,9 @@ async fn chat_completions_stream(
        let mut last_text = String::new();
        let has_custom_tools = state.mitm_store.get_tools().await.is_some();

-        // Clear any stale captured response from previous requests
+        // Clear any stale captured response and upstream errors from previous requests
        state.mitm_store.clear_response_async().await;
+        state.mitm_store.clear_upstream_error().await;

        // Initial role chunk
        yield Ok::<_, std::convert::Infallible>(Event::default().data(chunk_json(
@@ -513,6 +514,27 @@ async fn chat_completions_stream(
        };

        while start.elapsed().as_secs() < timeout {
+            // Check for upstream errors from MITM (Google API errors)
+            if let Some(err) = state.mitm_store.take_upstream_error().await {
+                let error_msg = err.message.clone()
+                    .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status));
+                let error_type = match err.error_status.as_deref() {
+                    Some("INVALID_ARGUMENT") => "invalid_request_error",
+                    Some("RESOURCE_EXHAUSTED") => "rate_limit_error",
+                    Some("PERMISSION_DENIED") | Some("UNAUTHENTICATED") => "authentication_error",
+                    _ => "upstream_error",
+                };
+                yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
+                    "error": {
+                        "message": error_msg,
+                        "type": error_type,
+                        "code": err.status,
+                    }
+                })).unwrap()));
+                yield Ok(Event::default().data("[DONE]".to_string()));
+                break;
+            }
+
            // ── Check for MITM-captured function calls FIRST ──
            // This runs independently of LS steps — the MITM captures tool calls
            // at the proxy layer, so we don't need to wait for LS processing.
@@ -852,6 +874,9 @@ async fn chat_completions_sync(
    timeout: u64,
 ) -> axum::response::Response {
    let result = poll_for_response(&state, &cascade_id, timeout).await;
+    if let Some(ref err) = result.upstream_error {
+        return upstream_err_response(err);
+    }

    // Check MITM store first for real intercepted usage (fallback to _latest)
    let mitm = match state.mitm_store.take_usage(&cascade_id).await {