From a47c572e480fe9d89b4e49e366890bb68093f733 Mon Sep 17 00:00:00 2001 From: Nikketryhard Date: Mon, 16 Feb 2026 19:30:32 -0600 Subject: [PATCH] fix: forward Google's exact error messages to client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: errors from Google were being swallowed, replaced with placeholders like 'Google API returned HTTP 400' or '[Timeout waiting for response]', or silently converted to fake 'incomplete' responses. Changes across all endpoints (/v1/chat/completions, /v1/responses, /v1/gemini, /v1/search): Error message fidelity: - UpstreamError message now includes Google's status prefix: [STATUS] msg - Falls back to raw body if JSON parsing fails (protobuf, HTML, etc.) - ErrorDetail gains optional code and param fields Timeout handling: - poll_for_response returns UpstreamError(504, DEADLINE_EXCEEDED) on timeout instead of '[Timeout waiting for AI response]' placeholder text - Streaming timeouts emit proper error events, not fake content - Sync bypass timeouts return 504 Gateway Timeout, not 200 incomplete Missing error checks added: - responses.rs sync bypass: added upstream_error check in polling loop - gemini.rs sync bypass: added upstream_error check in polling loop - gemini.rs streaming: added upstream_error check in polling loop (was completely missing — errors only handled in sync path) DRY helpers: - upstream_error_message(): shared exact message extraction - upstream_error_type(): shared Google→OpenAI error type mapping - All streaming handlers use these instead of inline formatting --- src/api/completions.rs | 37 +++++------------ src/api/gemini.rs | 53 +++++++++++++++++------- src/api/polling.rs | 11 ++++- src/api/responses.rs | 94 ++++++++++++++++++------------------------ src/api/types.rs | 4 ++ src/api/util.rs | 78 +++++++++++++++++++++++++++++++---- 6 files changed, 171 insertions(+), 106 deletions(-) diff --git a/src/api/completions.rs b/src/api/completions.rs index cad1a45..426a789 100644 --- a/src/api/completions.rs +++ b/src/api/completions.rs @@ -675,14 +675,8 @@ async fn chat_completions_stream( while start.elapsed().as_secs() < timeout { // Check for upstream errors from MITM (Google API errors) if let Some(err) = state.mitm_store.take_upstream_error().await { - let error_msg = err.message.clone() - .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status)); - let error_type = match err.error_status.as_deref() { - Some("INVALID_ARGUMENT") => "invalid_request_error", - Some("RESOURCE_EXHAUSTED") => "rate_limit_error", - Some("PERMISSION_DENIED") | Some("UNAUTHENTICATED") => "authentication_error", - _ => "upstream_error", - }; + let error_msg = super::util::upstream_error_message(&err); + let error_type = super::util::upstream_error_type(&err); yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({ "error": { "message": error_msg, @@ -997,26 +991,15 @@ async fn chat_completions_stream( tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await; } - // Timeout + // Timeout — emit error, not placeholder content warn!("Completions stream timeout after {}s", timeout); - let mitm = state.mitm_store.take_usage(&cascade_id).await - .or(state.mitm_store.take_usage("_latest").await); - let fr = google_to_openai_finish_reason(mitm.as_ref().and_then(|u| u.stop_reason.as_deref())); - yield Ok(Event::default().data(chunk_json( - &completion_id, &model_name, - serde_json::json!([chunk_choice(0, serde_json::json!({"content": if last_text.is_empty() { "[Timeout waiting for response]" } else { "" }}), Some(fr))]), - None, - ))); - if include_usage { - let (pt, ct, crt, tt) = if let Some(ref u) = mitm { - (u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens) - } else { (0, 0, 0, 0) }; - yield Ok(Event::default().data(chunk_json( - &completion_id, &model_name, - serde_json::json!([]), - Some(build_usage(pt, ct, crt, tt)), - ))); - } + yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({ + "error": { + "message": format!("Timeout: no response from Google API after {timeout}s"), + "type": "upstream_error", + "code": 504, + } + })).unwrap())); // Always clear in-flight flag when stream ends state.mitm_store.clear_response_async().await; yield Ok(Event::default().data("[DONE]")); diff --git a/src/api/gemini.rs b/src/api/gemini.rs index 119c5f0..15e861f 100644 --- a/src/api/gemini.rs +++ b/src/api/gemini.rs @@ -374,6 +374,11 @@ async fn gemini_sync( if has_custom_tools { let start = std::time::Instant::now(); while start.elapsed().as_secs() < timeout { + // Check for upstream errors from MITM (Google API errors) + if let Some(err) = state.mitm_store.take_upstream_error().await { + return upstream_err_response(&err); + } + // Check for function calls let captured = state.mitm_store.take_any_function_calls().await; if let Some(ref calls) = captured { @@ -444,13 +449,17 @@ async fn gemini_sync( tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; } - // Timeout - return Json(serde_json::json!({ - "error": { - "message": "Request timed out", - "type": "timeout_error", - } - })) + // Timeout — return proper error with status code + return ( + axum::http::StatusCode::GATEWAY_TIMEOUT, + Json(serde_json::json!({ + "error": { + "message": format!("Timeout: no response from Google API after {timeout}s"), + "type": "upstream_error", + "code": 504, + } + })), + ) .into_response(); } @@ -535,6 +544,21 @@ async fn gemini_stream( state.mitm_store.clear_response_async().await; while start.elapsed().as_secs() < timeout { + // Check for upstream errors from MITM (Google API errors) + if let Some(err) = state.mitm_store.take_upstream_error().await { + let error_msg = super::util::upstream_error_message(&err); + let error_type = super::util::upstream_error_type(&err); + yield Ok::<_, std::convert::Infallible>(Event::default().data(serde_json::to_string(&serde_json::json!({ + "error": { + "message": error_msg, + "type": error_type, + "code": err.status, + } + })).unwrap())); + yield Ok(Event::default().data("[DONE]".to_string())); + break; + } + // ── Check for MITM-captured function calls FIRST ── let captured = state.mitm_store.take_any_function_calls().await; if let Some(ref calls) = captured { @@ -705,16 +729,13 @@ async fn gemini_stream( tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await; } - // Timeout + // Timeout — emit proper error yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({ - "candidates": [{ - "content": { - "parts": [{"text": if last_text.is_empty() { "[Timeout]" } else { "" }}], - "role": "model", - }, - "finishReason": "STOP", - }], - "modelVersion": model_name, + "error": { + "message": format!("Timeout: no response from Google API after {timeout}s"), + "type": "upstream_error", + "code": 504, + } })).unwrap_or_default())); yield Ok(Event::default().data("[DONE]")); }; diff --git a/src/api/polling.rs b/src/api/polling.rs index 6050ba0..406cb44 100644 --- a/src/api/polling.rs +++ b/src/api/polling.rs @@ -323,11 +323,18 @@ pub(crate) async fn poll_for_response( warn!("Timeout after {timeout}s on cascade {short_id}"); PollResult { - text: "[Timeout waiting for AI response]".to_string(), + text: String::new(), usage: None, thinking_signature: None, thinking: None, thinking_duration: None, - upstream_error: None, + upstream_error: Some(crate::mitm::store::UpstreamError { + status: 504, + body: String::new(), + message: Some(format!( + "Timeout: no response from Google API after {timeout}s" + )), + error_status: Some("DEADLINE_EXCEEDED".to_string()), + }), } } diff --git a/src/api/responses.rs b/src/api/responses.rs index 8fb2b7e..48b996c 100644 --- a/src/api/responses.rs +++ b/src/api/responses.rs @@ -615,6 +615,11 @@ async fn handle_responses_sync( if has_custom_tools { let start = std::time::Instant::now(); while start.elapsed().as_secs() < timeout { + // Check for upstream errors from MITM (Google API errors) + if let Some(err) = state.mitm_store.take_upstream_error().await { + return upstream_err_response(&err); + } + // Check for function calls let captured = state.mitm_store.take_function_calls(&cascade_id).await; if let Some(ref raw_calls) = captured { @@ -706,21 +711,12 @@ async fn handle_responses_sync( tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; } - // Timeout - let resp = build_response_object( - ResponseData { - id: response_id, - model: model_name, - status: "incomplete", - created_at, - completed_at: None, - output: vec![], - usage: Some(Usage::estimate(¶ms.user_text, "")), - thinking_signature: None, - }, - ¶ms, + // Timeout — return proper error, not fake incomplete response + return err_response( + StatusCode::GATEWAY_TIMEOUT, + format!("Timeout: no response from Google API after {timeout}s"), + "upstream_error", ); - return Json(resp).into_response(); } // ── Normal LS path (no custom tools) ── @@ -904,8 +900,8 @@ async fn handle_responses_stream( while start.elapsed().as_secs() < timeout { // Check for upstream errors from MITM (Google API errors) if let Some(err) = state.mitm_store.take_upstream_error().await { - let error_msg = err.message.clone() - .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status)); + let error_msg = super::util::upstream_error_message(&err); + let error_type = super::util::upstream_error_type(&err); yield Ok(responses_sse_event( "response.failed", serde_json::json!({ @@ -915,7 +911,7 @@ async fn handle_responses_stream( "id": &response_id, "status": "failed", "error": { - "type": err.error_status.as_deref().unwrap_or("upstream_error"), + "type": error_type, "message": error_msg, "code": err.status, }, @@ -1202,26 +1198,21 @@ async fn handle_responses_stream( tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await; } - // Timeout in bypass mode - let timeout_resp = build_response_object( - ResponseData { - id: response_id.clone(), - model: model_name.clone(), - status: "incomplete", - created_at, - completed_at: None, - output: vec![], - usage: Some(Usage::estimate(¶ms.user_text, "")), - thinking_signature: None, - }, - ¶ms, - ); + // Timeout in bypass mode — emit error, not fake incomplete yield Ok(responses_sse_event( - "response.completed", + "response.failed", serde_json::json!({ - "type": "response.completed", + "type": "response.failed", "sequence_number": next_seq(), - "response": response_to_json(&timeout_resp), + "response": { + "id": &response_id, + "status": "failed", + "error": { + "type": "upstream_error", + "message": format!("Timeout: no response from Google API after {timeout}s"), + "code": 504, + }, + }, }), )); return; @@ -1247,8 +1238,8 @@ async fn handle_responses_stream( while start.elapsed().as_secs() < timeout { // Check for upstream errors from MITM (Google API errors) if let Some(err) = state.mitm_store.take_upstream_error().await { - let error_msg = err.message.clone() - .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status)); + let error_msg = super::util::upstream_error_message(&err); + let error_type = super::util::upstream_error_type(&err); yield Ok(responses_sse_event( "response.failed", serde_json::json!({ @@ -1258,7 +1249,7 @@ async fn handle_responses_stream( "id": &response_id, "status": "failed", "error": { - "type": err.error_status.as_deref().unwrap_or("upstream_error"), + "type": error_type, "message": error_msg, "code": err.status, }, @@ -1507,26 +1498,21 @@ async fn handle_responses_stream( } } - // Timeout — emit incomplete response - let timeout_resp = build_response_object( - ResponseData { - id: response_id.clone(), - model: model_name.clone(), - status: "incomplete", - created_at, - completed_at: None, - output: vec![], - usage: Some(Usage::estimate(¶ms.user_text, "")), - thinking_signature: None, - }, - ¶ms, - ); + // Timeout — emit error, not fake incomplete response yield Ok(responses_sse_event( - "response.completed", + "response.failed", serde_json::json!({ - "type": "response.completed", + "type": "response.failed", "sequence_number": next_seq(), - "response": response_to_json(&timeout_resp), + "response": { + "id": &response_id, + "status": "failed", + "error": { + "type": "upstream_error", + "message": format!("Timeout: no response from Google API after {timeout}s"), + "code": 504, + }, + }, }), )); }; diff --git a/src/api/types.rs b/src/api/types.rs index 9b35520..9f1445f 100644 --- a/src/api/types.rs +++ b/src/api/types.rs @@ -410,4 +410,8 @@ pub(crate) struct ErrorDetail { pub message: String, #[serde(rename = "type")] pub error_type: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub code: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub param: Option, } diff --git a/src/api/util.rs b/src/api/util.rs index a489f63..ffff4dc 100644 --- a/src/api/util.rs +++ b/src/api/util.rs @@ -20,17 +20,18 @@ pub(crate) fn err_response( error: ErrorDetail { message, error_type: error_type.to_string(), + code: Some(status.as_u16()), + param: None, }, }; (status, Json(body)).into_response() } /// Convert a MITM-captured upstream error from Google into an HTTP response. -/// Maps Google's HTTP status codes and preserves the error message. +/// Forwards Google's exact error message and HTTP status code to the client. pub(crate) fn upstream_err_response( err: &crate::mitm::store::UpstreamError, ) -> axum::response::Response { - // Map Google's status code to HTTP status let status = StatusCode::from_u16(err.status).unwrap_or(StatusCode::BAD_GATEWAY); // Map Google error status to OpenAI-style error type @@ -43,12 +44,75 @@ pub(crate) fn upstream_err_response( _ => "upstream_error", }; - let message = err - .message - .clone() - .unwrap_or_else(|| format!("Google API returned HTTP {}", err.status)); + // Use Google's exact error message. Try parsed message first, then raw body. + let message = if let Some(ref msg) = err.message { + // Include Google's error status for context if available + if let Some(ref gstatus) = err.error_status { + format!("[{gstatus}] {msg}") + } else { + msg.clone() + } + } else if !err.body.is_empty() { + // No parsed message — forward the raw body as-is so the client + // sees exactly what Google returned (protobuf, HTML, etc.) + err.body.clone() + } else { + format!("Google API error: HTTP {}", err.status) + }; - err_response(status, message, error_type) + // Extract param hint from Google's error details if available + let param = serde_json::from_str::(&err.body) + .ok() + .and_then(|v| { + v["error"]["details"] + .as_array() + .and_then(|details| { + details.iter().find_map(|d| { + d["fieldViolations"] + .as_array() + .and_then(|fv| fv.first()) + .and_then(|v| v["field"].as_str().map(|s| s.to_string())) + }) + }) + }); + + let body = ErrorResponse { + error: ErrorDetail { + message, + error_type: error_type.to_string(), + code: Some(err.status), + param, + }, + }; + (status, Json(body)).into_response() +} + +/// Extract the exact error message from a MITM-captured upstream error. +/// Preserves Google's original message verbatim. Used by streaming handlers. +pub(crate) fn upstream_error_message(err: &crate::mitm::store::UpstreamError) -> String { + if let Some(ref msg) = err.message { + if let Some(ref gstatus) = err.error_status { + format!("[{gstatus}] {msg}") + } else { + msg.clone() + } + } else if !err.body.is_empty() { + err.body.clone() + } else { + format!("Google API error: HTTP {}", err.status) + } +} + +/// Map Google's error status to OpenAI-compatible error type string. +pub(crate) fn upstream_error_type(err: &crate::mitm::store::UpstreamError) -> &'static str { + match err.error_status.as_deref() { + Some("INVALID_ARGUMENT") => "invalid_request_error", + Some("RESOURCE_EXHAUSTED") => "rate_limit_error", + Some("PERMISSION_DENIED") | Some("UNAUTHENTICATED") => "authentication_error", + Some("NOT_FOUND") => "not_found_error", + Some("INTERNAL") | Some("UNAVAILABLE") => "server_error", + _ => "upstream_error", + } } pub(crate) fn now_unix() -> u64 {