From a47c572e480fe9d89b4e49e366890bb68093f733 Mon Sep 17 00:00:00 2001
From: Nikketryhard <louisnewmobile@gmail.com>
Date: Mon, 16 Feb 2026 19:30:32 -0600
Subject: [PATCH] fix: forward Google's exact error messages to client
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause: errors from Google were being swallowed, replaced with
placeholders like 'Google API returned HTTP 400' or '[Timeout waiting
for response]', or silently converted to fake 'incomplete' responses.

Changes across all endpoints (/v1/chat/completions, /v1/responses,
/v1/gemini, /v1/search):

Error message fidelity:
- UpstreamError message now includes Google's status prefix: [STATUS] msg
- Falls back to raw body if JSON parsing fails (protobuf, HTML, etc.)
- ErrorDetail gains optional code and param fields

Timeout handling:
- poll_for_response returns UpstreamError(504, DEADLINE_EXCEEDED) on timeout
  instead of '[Timeout waiting for AI response]' placeholder text
- Streaming timeouts emit proper error events, not fake content
- Sync bypass timeouts return 504 Gateway Timeout, not 200 incomplete

Missing error checks added:
- responses.rs sync bypass: added upstream_error check in polling loop
- gemini.rs sync bypass: added upstream_error check in polling loop
- gemini.rs streaming: added upstream_error check in polling loop
  (was completely missing — errors only handled in sync path)

DRY helpers:
- upstream_error_message(): shared exact message extraction
- upstream_error_type(): shared Google→OpenAI error type mapping
- All streaming handlers use these instead of inline formatting
---
 src/api/completions.rs | 37 +++++------------
 src/api/gemini.rs      | 53 +++++++++++++++++-------
 src/api/polling.rs     | 11 ++++-
 src/api/responses.rs   | 94 ++++++++++++++++++------------------------
 src/api/types.rs       |  4 ++
 src/api/util.rs        | 78 +++++++++++++++++++++++++++++++----
 6 files changed, 171 insertions(+), 106 deletions(-)

diff --git a/src/api/completions.rs b/src/api/completions.rs
index cad1a45..426a789 100644
--- a/src/api/completions.rs
+++ b/src/api/completions.rs
@@ -675,14 +675,8 @@ async fn chat_completions_stream(
         while start.elapsed().as_secs() < timeout {
             // Check for upstream errors from MITM (Google API errors)
             if let Some(err) = state.mitm_store.take_upstream_error().await {
-                let error_msg = err.message.clone()
-                    .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status));
-                let error_type = match err.error_status.as_deref() {
-                    Some("INVALID_ARGUMENT") => "invalid_request_error",
-                    Some("RESOURCE_EXHAUSTED") => "rate_limit_error",
-                    Some("PERMISSION_DENIED") | Some("UNAUTHENTICATED") => "authentication_error",
-                    _ => "upstream_error",
-                };
+                let error_msg = super::util::upstream_error_message(&err);
+                let error_type = super::util::upstream_error_type(&err);
                 yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
                     "error": {
                         "message": error_msg,
@@ -997,26 +991,15 @@ async fn chat_completions_stream(
             tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
         }
 
-        // Timeout
+        // Timeout — emit error, not placeholder content
         warn!("Completions stream timeout after {}s", timeout);
-        let mitm = state.mitm_store.take_usage(&cascade_id).await
-            .or(state.mitm_store.take_usage("_latest").await);
-        let fr = google_to_openai_finish_reason(mitm.as_ref().and_then(|u| u.stop_reason.as_deref()));
-        yield Ok(Event::default().data(chunk_json(
-            &completion_id, &model_name,
-            serde_json::json!([chunk_choice(0, serde_json::json!({"content": if last_text.is_empty() { "[Timeout waiting for response]" } else { "" }}), Some(fr))]),
-            None,
-        )));
-        if include_usage {
-            let (pt, ct, crt, tt) = if let Some(ref u) = mitm {
-                (u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
-            } else { (0, 0, 0, 0) };
-            yield Ok(Event::default().data(chunk_json(
-                &completion_id, &model_name,
-                serde_json::json!([]),
-                Some(build_usage(pt, ct, crt, tt)),
-            )));
-        }
+        yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
+            "error": {
+                "message": format!("Timeout: no response from Google API after {timeout}s"),
+                "type": "upstream_error",
+                "code": 504,
+            }
+        })).unwrap()));
         // Always clear in-flight flag when stream ends
         state.mitm_store.clear_response_async().await;
         yield Ok(Event::default().data("[DONE]"));
diff --git a/src/api/gemini.rs b/src/api/gemini.rs
index 119c5f0..15e861f 100644
--- a/src/api/gemini.rs
+++ b/src/api/gemini.rs
@@ -374,6 +374,11 @@ async fn gemini_sync(
     if has_custom_tools {
         let start = std::time::Instant::now();
         while start.elapsed().as_secs() < timeout {
+            // Check for upstream errors from MITM (Google API errors)
+            if let Some(err) = state.mitm_store.take_upstream_error().await {
+                return upstream_err_response(&err);
+            }
+
             // Check for function calls
             let captured = state.mitm_store.take_any_function_calls().await;
             if let Some(ref calls) = captured {
@@ -444,13 +449,17 @@ async fn gemini_sync(
             tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
         }
 
-        // Timeout
-        return Json(serde_json::json!({
-            "error": {
-                "message": "Request timed out",
-                "type": "timeout_error",
-            }
-        }))
+        // Timeout — return proper error with status code
+        return (
+            axum::http::StatusCode::GATEWAY_TIMEOUT,
+            Json(serde_json::json!({
+                "error": {
+                    "message": format!("Timeout: no response from Google API after {timeout}s"),
+                    "type": "upstream_error",
+                    "code": 504,
+                }
+            })),
+        )
         .into_response();
     }
 
@@ -535,6 +544,21 @@ async fn gemini_stream(
         state.mitm_store.clear_response_async().await;
 
         while start.elapsed().as_secs() < timeout {
+            // Check for upstream errors from MITM (Google API errors)
+            if let Some(err) = state.mitm_store.take_upstream_error().await {
+                let error_msg = super::util::upstream_error_message(&err);
+                let error_type = super::util::upstream_error_type(&err);
+                yield Ok::<_, std::convert::Infallible>(Event::default().data(serde_json::to_string(&serde_json::json!({
+                    "error": {
+                        "message": error_msg,
+                        "type": error_type,
+                        "code": err.status,
+                    }
+                })).unwrap()));
+                yield Ok(Event::default().data("[DONE]".to_string()));
+                break;
+            }
+
             // ── Check for MITM-captured function calls FIRST ──
             let captured = state.mitm_store.take_any_function_calls().await;
             if let Some(ref calls) = captured {
@@ -705,16 +729,13 @@ async fn gemini_stream(
             tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
         }
 
-        // Timeout
+        // Timeout — emit proper error
         yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
-            "candidates": [{
-                "content": {
-                    "parts": [{"text": if last_text.is_empty() { "[Timeout]" } else { "" }}],
-                    "role": "model",
-                },
-                "finishReason": "STOP",
-            }],
-            "modelVersion": model_name,
+            "error": {
+                "message": format!("Timeout: no response from Google API after {timeout}s"),
+                "type": "upstream_error",
+                "code": 504,
+            }
         })).unwrap_or_default()));
         yield Ok(Event::default().data("[DONE]"));
     };
diff --git a/src/api/polling.rs b/src/api/polling.rs
index 6050ba0..406cb44 100644
--- a/src/api/polling.rs
+++ b/src/api/polling.rs
@@ -323,11 +323,18 @@ pub(crate) async fn poll_for_response(
 
     warn!("Timeout after {timeout}s on cascade {short_id}");
     PollResult {
-        text: "[Timeout waiting for AI response]".to_string(),
+        text: String::new(),
         usage: None,
         thinking_signature: None,
         thinking: None,
         thinking_duration: None,
-        upstream_error: None,
+        upstream_error: Some(crate::mitm::store::UpstreamError {
+            status: 504,
+            body: String::new(),
+            message: Some(format!(
+                "Timeout: no response from Google API after {timeout}s"
+            )),
+            error_status: Some("DEADLINE_EXCEEDED".to_string()),
+        }),
     }
 }
diff --git a/src/api/responses.rs b/src/api/responses.rs
index 8fb2b7e..48b996c 100644
--- a/src/api/responses.rs
+++ b/src/api/responses.rs
@@ -615,6 +615,11 @@ async fn handle_responses_sync(
     if has_custom_tools {
         let start = std::time::Instant::now();
         while start.elapsed().as_secs() < timeout {
+            // Check for upstream errors from MITM (Google API errors)
+            if let Some(err) = state.mitm_store.take_upstream_error().await {
+                return upstream_err_response(&err);
+            }
+
             // Check for function calls
             let captured = state.mitm_store.take_function_calls(&cascade_id).await;
             if let Some(ref raw_calls) = captured {
@@ -706,21 +711,12 @@ async fn handle_responses_sync(
             tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
         }
 
-        // Timeout
-        let resp = build_response_object(
-            ResponseData {
-                id: response_id,
-                model: model_name,
-                status: "incomplete",
-                created_at,
-                completed_at: None,
-                output: vec![],
-                usage: Some(Usage::estimate(&params.user_text, "")),
-                thinking_signature: None,
-            },
-            &params,
+        // Timeout — return proper error, not fake incomplete response
+        return err_response(
+            StatusCode::GATEWAY_TIMEOUT,
+            format!("Timeout: no response from Google API after {timeout}s"),
+            "upstream_error",
         );
-        return Json(resp).into_response();
     }
 
     // ── Normal LS path (no custom tools) ──
@@ -904,8 +900,8 @@ async fn handle_responses_stream(
             while start.elapsed().as_secs() < timeout {
                 // Check for upstream errors from MITM (Google API errors)
                 if let Some(err) = state.mitm_store.take_upstream_error().await {
-                    let error_msg = err.message.clone()
-                        .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status));
+                    let error_msg = super::util::upstream_error_message(&err);
+                    let error_type = super::util::upstream_error_type(&err);
                     yield Ok(responses_sse_event(
                         "response.failed",
                         serde_json::json!({
@@ -915,7 +911,7 @@ async fn handle_responses_stream(
                                 "id": &response_id,
                                 "status": "failed",
                                 "error": {
-                                    "type": err.error_status.as_deref().unwrap_or("upstream_error"),
+                                    "type": error_type,
                                     "message": error_msg,
                                     "code": err.status,
                                 },
@@ -1202,26 +1198,21 @@ async fn handle_responses_stream(
                 tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
             }
 
-            // Timeout in bypass mode
-            let timeout_resp = build_response_object(
-                ResponseData {
-                    id: response_id.clone(),
-                    model: model_name.clone(),
-                    status: "incomplete",
-                    created_at,
-                    completed_at: None,
-                    output: vec![],
-                    usage: Some(Usage::estimate(&params.user_text, "")),
-                    thinking_signature: None,
-                },
-                &params,
-            );
+            // Timeout in bypass mode — emit error, not fake incomplete
             yield Ok(responses_sse_event(
-                "response.completed",
+                "response.failed",
                 serde_json::json!({
-                    "type": "response.completed",
+                    "type": "response.failed",
                     "sequence_number": next_seq(),
-                    "response": response_to_json(&timeout_resp),
+                    "response": {
+                        "id": &response_id,
+                        "status": "failed",
+                        "error": {
+                            "type": "upstream_error",
+                            "message": format!("Timeout: no response from Google API after {timeout}s"),
+                            "code": 504,
+                        },
+                    },
                 }),
             ));
             return;
@@ -1247,8 +1238,8 @@ async fn handle_responses_stream(
         while start.elapsed().as_secs() < timeout {
             // Check for upstream errors from MITM (Google API errors)
             if let Some(err) = state.mitm_store.take_upstream_error().await {
-                let error_msg = err.message.clone()
-                    .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status));
+                let error_msg = super::util::upstream_error_message(&err);
+                let error_type = super::util::upstream_error_type(&err);
                 yield Ok(responses_sse_event(
                     "response.failed",
                     serde_json::json!({
@@ -1258,7 +1249,7 @@ async fn handle_responses_stream(
                             "id": &response_id,
                             "status": "failed",
                             "error": {
-                                "type": err.error_status.as_deref().unwrap_or("upstream_error"),
+                                "type": error_type,
                                 "message": error_msg,
                                 "code": err.status,
                             },
@@ -1507,26 +1498,21 @@ async fn handle_responses_stream(
             }
         }
 
-        // Timeout — emit incomplete response
-        let timeout_resp = build_response_object(
-            ResponseData {
-                id: response_id.clone(),
-                model: model_name.clone(),
-                status: "incomplete",
-                created_at,
-                completed_at: None,
-                output: vec![],
-                usage: Some(Usage::estimate(&params.user_text, "")),
-                thinking_signature: None,
-            },
-            &params,
-        );
+        // Timeout — emit error, not fake incomplete response
         yield Ok(responses_sse_event(
-            "response.completed",
+            "response.failed",
             serde_json::json!({
-                "type": "response.completed",
+                "type": "response.failed",
                 "sequence_number": next_seq(),
-                "response": response_to_json(&timeout_resp),
+                "response": {
+                    "id": &response_id,
+                    "status": "failed",
+                    "error": {
+                        "type": "upstream_error",
+                        "message": format!("Timeout: no response from Google API after {timeout}s"),
+                        "code": 504,
+                    },
+                },
             }),
         ));
     };
diff --git a/src/api/types.rs b/src/api/types.rs
index 9b35520..9f1445f 100644
--- a/src/api/types.rs
+++ b/src/api/types.rs
@@ -410,4 +410,8 @@ pub(crate) struct ErrorDetail {
     pub message: String,
     #[serde(rename = "type")]
     pub error_type: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub code: Option<u16>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub param: Option<String>,
 }
diff --git a/src/api/util.rs b/src/api/util.rs
index a489f63..ffff4dc 100644
--- a/src/api/util.rs
+++ b/src/api/util.rs
@@ -20,17 +20,18 @@ pub(crate) fn err_response(
         error: ErrorDetail {
             message,
             error_type: error_type.to_string(),
+            code: Some(status.as_u16()),
+            param: None,
         },
     };
     (status, Json(body)).into_response()
 }
 
 /// Convert a MITM-captured upstream error from Google into an HTTP response.
-/// Maps Google's HTTP status codes and preserves the error message.
+/// Forwards Google's exact error message and HTTP status code to the client.
 pub(crate) fn upstream_err_response(
     err: &crate::mitm::store::UpstreamError,
 ) -> axum::response::Response {
-    // Map Google's status code to HTTP status
     let status = StatusCode::from_u16(err.status).unwrap_or(StatusCode::BAD_GATEWAY);
 
     // Map Google error status to OpenAI-style error type
@@ -43,12 +44,75 @@ pub(crate) fn upstream_err_response(
         _ => "upstream_error",
     };
 
-    let message = err
-        .message
-        .clone()
-        .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status));
+    // Use Google's exact error message. Try parsed message first, then raw body.
+    let message = if let Some(ref msg) = err.message {
+        // Include Google's error status for context if available
+        if let Some(ref gstatus) = err.error_status {
+            format!("[{gstatus}] {msg}")
+        } else {
+            msg.clone()
+        }
+    } else if !err.body.is_empty() {
+        // No parsed message — forward the raw body as-is so the client
+        // sees exactly what Google returned (protobuf, HTML, etc.)
+        err.body.clone()
+    } else {
+        format!("Google API error: HTTP {}", err.status)
+    };
 
-    err_response(status, message, error_type)
+    // Extract param hint from Google's error details if available
+    let param = serde_json::from_str::<serde_json::Value>(&err.body)
+        .ok()
+        .and_then(|v| {
+            v["error"]["details"]
+                .as_array()
+                .and_then(|details| {
+                    details.iter().find_map(|d| {
+                        d["fieldViolations"]
+                            .as_array()
+                            .and_then(|fv| fv.first())
+                            .and_then(|v| v["field"].as_str().map(|s| s.to_string()))
+                    })
+                })
+        });
+
+    let body = ErrorResponse {
+        error: ErrorDetail {
+            message,
+            error_type: error_type.to_string(),
+            code: Some(err.status),
+            param,
+        },
+    };
+    (status, Json(body)).into_response()
+}
+
+/// Extract the exact error message from a MITM-captured upstream error.
+/// Preserves Google's original message verbatim. Used by streaming handlers.
+pub(crate) fn upstream_error_message(err: &crate::mitm::store::UpstreamError) -> String {
+    if let Some(ref msg) = err.message {
+        if let Some(ref gstatus) = err.error_status {
+            format!("[{gstatus}] {msg}")
+        } else {
+            msg.clone()
+        }
+    } else if !err.body.is_empty() {
+        err.body.clone()
+    } else {
+        format!("Google API error: HTTP {}", err.status)
+    }
+}
+
+/// Map Google's error status to OpenAI-compatible error type string.
+pub(crate) fn upstream_error_type(err: &crate::mitm::store::UpstreamError) -> &'static str {
+    match err.error_status.as_deref() {
+        Some("INVALID_ARGUMENT") => "invalid_request_error",
+        Some("RESOURCE_EXHAUSTED") => "rate_limit_error",
+        Some("PERMISSION_DENIED") | Some("UNAUTHENTICATED") => "authentication_error",
+        Some("NOT_FOUND") => "not_found_error",
+        Some("INTERNAL") | Some("UNAVAILABLE") => "server_error",
+        _ => "upstream_error",
+    }
 }
 
 pub(crate) fn now_unix() -> u64 {