fix: forward Google's exact error messages to client
Root cause: errors from Google were being swallowed, replaced with placeholders like 'Google API returned HTTP 400' or '[Timeout waiting for response]', or silently converted to fake 'incomplete' responses. Changes across all endpoints (/v1/chat/completions, /v1/responses, /v1/gemini, /v1/search): Error message fidelity: - UpstreamError message now includes Google's status prefix: [STATUS] msg - Falls back to raw body if JSON parsing fails (protobuf, HTML, etc.) - ErrorDetail gains optional code and param fields Timeout handling: - poll_for_response returns UpstreamError(504, DEADLINE_EXCEEDED) on timeout instead of '[Timeout waiting for AI response]' placeholder text - Streaming timeouts emit proper error events, not fake content - Sync bypass timeouts return 504 Gateway Timeout, not 200 incomplete Missing error checks added: - responses.rs sync bypass: added upstream_error check in polling loop - gemini.rs sync bypass: added upstream_error check in polling loop - gemini.rs streaming: added upstream_error check in polling loop (was completely missing — errors only handled in sync path) DRY helpers: - upstream_error_message(): shared exact message extraction - upstream_error_type(): shared Google→OpenAI error type mapping - All streaming handlers use these instead of inline formatting
This commit is contained in:
@@ -675,14 +675,8 @@ async fn chat_completions_stream(
|
||||
while start.elapsed().as_secs() < timeout {
|
||||
// Check for upstream errors from MITM (Google API errors)
|
||||
if let Some(err) = state.mitm_store.take_upstream_error().await {
|
||||
let error_msg = err.message.clone()
|
||||
.unwrap_or_else(|| format!("Google API returned HTTP {}", err.status));
|
||||
let error_type = match err.error_status.as_deref() {
|
||||
Some("INVALID_ARGUMENT") => "invalid_request_error",
|
||||
Some("RESOURCE_EXHAUSTED") => "rate_limit_error",
|
||||
Some("PERMISSION_DENIED") | Some("UNAUTHENTICATED") => "authentication_error",
|
||||
_ => "upstream_error",
|
||||
};
|
||||
let error_msg = super::util::upstream_error_message(&err);
|
||||
let error_type = super::util::upstream_error_type(&err);
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"error": {
|
||||
"message": error_msg,
|
||||
@@ -997,26 +991,15 @@ async fn chat_completions_stream(
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
|
||||
}
|
||||
|
||||
// Timeout
|
||||
// Timeout — emit error, not placeholder content
|
||||
warn!("Completions stream timeout after {}s", timeout);
|
||||
let mitm = state.mitm_store.take_usage(&cascade_id).await
|
||||
.or(state.mitm_store.take_usage("_latest").await);
|
||||
let fr = google_to_openai_finish_reason(mitm.as_ref().and_then(|u| u.stop_reason.as_deref()));
|
||||
yield Ok(Event::default().data(chunk_json(
|
||||
&completion_id, &model_name,
|
||||
serde_json::json!([chunk_choice(0, serde_json::json!({"content": if last_text.is_empty() { "[Timeout waiting for response]" } else { "" }}), Some(fr))]),
|
||||
None,
|
||||
)));
|
||||
if include_usage {
|
||||
let (pt, ct, crt, tt) = if let Some(ref u) = mitm {
|
||||
(u.input_tokens, u.output_tokens, u.cache_read_input_tokens, u.thinking_output_tokens)
|
||||
} else { (0, 0, 0, 0) };
|
||||
yield Ok(Event::default().data(chunk_json(
|
||||
&completion_id, &model_name,
|
||||
serde_json::json!([]),
|
||||
Some(build_usage(pt, ct, crt, tt)),
|
||||
)));
|
||||
}
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"error": {
|
||||
"message": format!("Timeout: no response from Google API after {timeout}s"),
|
||||
"type": "upstream_error",
|
||||
"code": 504,
|
||||
}
|
||||
})).unwrap()));
|
||||
// Always clear in-flight flag when stream ends
|
||||
state.mitm_store.clear_response_async().await;
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
|
||||
Reference in New Issue
Block a user