chore: clean up MITM logs and add Google SSE tests
- Demote non-LLM request logs to debug (only streamGenerateContent at info) - Demote non-streaming response headers to debug - Add 5 Google SSE parser tests (single event, multi-event accumulation, chunked framing, completion detection, no-thinking-tokens) - Fix unused variable warning in proxy.rs
This commit is contained in:
@@ -291,5 +291,156 @@ mod tests {
|
|||||||
let usage = acc.into_usage();
|
let usage = acc.into_usage();
|
||||||
assert_eq!(usage.input_tokens, 200);
|
assert_eq!(usage.input_tokens, 200);
|
||||||
assert_eq!(usage.output_tokens, 75);
|
assert_eq!(usage.output_tokens, 75);
|
||||||
|
assert_eq!(usage.api_provider, Some("anthropic".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_google_sse_single_event() {
|
||||||
|
let mut acc = StreamingAccumulator::new();
|
||||||
|
|
||||||
|
let event = serde_json::json!({
|
||||||
|
"response": {
|
||||||
|
"candidates": [{"content": {"role": "model", "parts": [{"text": "4"}]}}],
|
||||||
|
"usageMetadata": {
|
||||||
|
"promptTokenCount": 1514,
|
||||||
|
"candidatesTokenCount": 25,
|
||||||
|
"totalTokenCount": 1539,
|
||||||
|
"thoughtsTokenCount": 52
|
||||||
|
},
|
||||||
|
"modelVersion": "gemini-3-flash",
|
||||||
|
"responseId": "abc123"
|
||||||
|
},
|
||||||
|
"traceId": "trace456",
|
||||||
|
"metadata": {}
|
||||||
|
});
|
||||||
|
|
||||||
|
acc.process_event(&event);
|
||||||
|
assert_eq!(acc.input_tokens, 1514);
|
||||||
|
assert_eq!(acc.output_tokens, 25);
|
||||||
|
assert_eq!(acc.thinking_tokens, 52);
|
||||||
|
assert_eq!(acc.model, Some("gemini-3-flash".to_string()));
|
||||||
|
assert!(!acc.is_complete); // no finishReason yet
|
||||||
|
assert_eq!(acc.api_provider, Some("google".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_google_sse_multi_event_accumulation() {
|
||||||
|
let mut acc = StreamingAccumulator::new();
|
||||||
|
|
||||||
|
// First event — partial response
|
||||||
|
let event1 = serde_json::json!({
|
||||||
|
"response": {
|
||||||
|
"candidates": [{"content": {"role": "model", "parts": [{"text": "Hello"}]}}],
|
||||||
|
"usageMetadata": {
|
||||||
|
"promptTokenCount": 1514,
|
||||||
|
"candidatesTokenCount": 6,
|
||||||
|
"totalTokenCount": 1520
|
||||||
|
},
|
||||||
|
"modelVersion": "gemini-2.5-flash-lite"
|
||||||
|
},
|
||||||
|
"traceId": "t1",
|
||||||
|
"metadata": {}
|
||||||
|
});
|
||||||
|
acc.process_event(&event1);
|
||||||
|
assert_eq!(acc.output_tokens, 6);
|
||||||
|
assert!(!acc.is_complete);
|
||||||
|
|
||||||
|
// Second event — more output
|
||||||
|
let event2 = serde_json::json!({
|
||||||
|
"response": {
|
||||||
|
"candidates": [{"content": {"role": "model", "parts": [{"text": " world"}]}}],
|
||||||
|
"usageMetadata": {
|
||||||
|
"promptTokenCount": 1514,
|
||||||
|
"candidatesTokenCount": 22,
|
||||||
|
"totalTokenCount": 1536
|
||||||
|
},
|
||||||
|
"modelVersion": "gemini-2.5-flash-lite"
|
||||||
|
},
|
||||||
|
"traceId": "t1",
|
||||||
|
"metadata": {}
|
||||||
|
});
|
||||||
|
acc.process_event(&event2);
|
||||||
|
assert_eq!(acc.output_tokens, 22); // cumulative, not additive
|
||||||
|
|
||||||
|
// Third event — completion
|
||||||
|
let event3 = serde_json::json!({
|
||||||
|
"response": {
|
||||||
|
"candidates": [{"content": {"role": "model", "parts": [{"text": "!"}]},
|
||||||
|
"finishReason": "STOP"}],
|
||||||
|
"usageMetadata": {
|
||||||
|
"promptTokenCount": 1514,
|
||||||
|
"candidatesTokenCount": 25,
|
||||||
|
"totalTokenCount": 1539,
|
||||||
|
"thoughtsTokenCount": 52
|
||||||
|
},
|
||||||
|
"modelVersion": "gemini-2.5-flash-lite"
|
||||||
|
},
|
||||||
|
"traceId": "t1",
|
||||||
|
"metadata": {}
|
||||||
|
});
|
||||||
|
acc.process_event(&event3);
|
||||||
|
assert!(acc.is_complete);
|
||||||
|
assert_eq!(acc.output_tokens, 25);
|
||||||
|
assert_eq!(acc.thinking_tokens, 52);
|
||||||
|
assert_eq!(acc.stop_reason, Some("STOP".to_string()));
|
||||||
|
|
||||||
|
let usage = acc.into_usage();
|
||||||
|
assert_eq!(usage.input_tokens, 1514);
|
||||||
|
assert_eq!(usage.output_tokens, 25);
|
||||||
|
assert_eq!(usage.thinking_output_tokens, 52);
|
||||||
|
assert_eq!(usage.model, Some("gemini-2.5-flash-lite".to_string()));
|
||||||
|
assert_eq!(usage.api_provider, Some("google".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_google_sse_parse_streaming_chunk() {
|
||||||
|
// Simulates real SSE data with HTTP chunked framing (hex sizes on their own lines)
|
||||||
|
let chunk = r#"150
|
||||||
|
data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text": "4"}]}}],"usageMetadata": {"promptTokenCount": 14615,"candidatesTokenCount": 1,"totalTokenCount": 14668,"thoughtsTokenCount": 52},"modelVersion": "gemini-3-flash","responseId": "agaRacPLC4WHz7IPreOl8QM"},"traceId": "8145be7112baf823","metadata": {}}
|
||||||
|
|
||||||
|
|
||||||
|
2f1
|
||||||
|
data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text": ""}]},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 14615,"candidatesTokenCount": 1,"totalTokenCount": 14668,"thoughtsTokenCount": 52},"modelVersion": "gemini-3-flash","responseId": "agaRacPLC4WHz7IPreOl8QM"},"traceId": "8145be7112baf823","metadata": {}}
|
||||||
|
|
||||||
|
|
||||||
|
0
|
||||||
|
"#;
|
||||||
|
|
||||||
|
let mut acc = StreamingAccumulator::new();
|
||||||
|
parse_streaming_chunk(chunk, &mut acc);
|
||||||
|
|
||||||
|
assert_eq!(acc.input_tokens, 14615);
|
||||||
|
assert_eq!(acc.output_tokens, 1);
|
||||||
|
assert_eq!(acc.thinking_tokens, 52);
|
||||||
|
assert!(acc.is_complete);
|
||||||
|
assert_eq!(acc.model, Some("gemini-3-flash".to_string()));
|
||||||
|
assert_eq!(acc.stop_reason, Some("STOP".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_google_sse_no_thinking_tokens() {
|
||||||
|
let mut acc = StreamingAccumulator::new();
|
||||||
|
|
||||||
|
let event = serde_json::json!({
|
||||||
|
"response": {
|
||||||
|
"candidates": [{"content": {"role": "model", "parts": [{"text": "hi"}]},
|
||||||
|
"finishReason": "STOP"}],
|
||||||
|
"usageMetadata": {
|
||||||
|
"promptTokenCount": 100,
|
||||||
|
"candidatesTokenCount": 5,
|
||||||
|
"totalTokenCount": 105
|
||||||
|
},
|
||||||
|
"modelVersion": "gemini-2.5-flash-lite"
|
||||||
|
},
|
||||||
|
"traceId": "t1",
|
||||||
|
"metadata": {}
|
||||||
|
});
|
||||||
|
|
||||||
|
acc.process_event(&event);
|
||||||
|
assert_eq!(acc.thinking_tokens, 0); // no thoughtsTokenCount field
|
||||||
|
assert!(acc.is_complete);
|
||||||
|
|
||||||
|
let usage = acc.into_usage();
|
||||||
|
assert_eq!(usage.thinking_output_tokens, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -512,7 +512,7 @@ async fn handle_http_over_tls(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Parse the HTTP request to find headers and body
|
// Parse the HTTP request to find headers and body
|
||||||
let (headers_end, content_length, is_streaming_request) = parse_http_request_meta(&request_buf);
|
let (headers_end, content_length, _is_streaming_request) = parse_http_request_meta(&request_buf);
|
||||||
|
|
||||||
// Try to extract cascade hint from request body
|
// Try to extract cascade hint from request body
|
||||||
let cascade_hint = if headers_end < request_buf.len() {
|
let cascade_hint = if headers_end < request_buf.len() {
|
||||||
@@ -533,14 +533,22 @@ async fn handle_http_over_tls(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Log LLM calls at info, everything else at debug
|
||||||
|
if req_path.contains("streamGenerateContent") {
|
||||||
info!(
|
info!(
|
||||||
domain,
|
domain,
|
||||||
req_path = %req_path,
|
req_path = %req_path,
|
||||||
content_length,
|
|
||||||
streaming = is_streaming_request,
|
|
||||||
cascade = ?cascade_hint,
|
cascade = ?cascade_hint,
|
||||||
|
"MITM: forwarding LLM request"
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
debug!(
|
||||||
|
domain,
|
||||||
|
req_path = %req_path,
|
||||||
|
content_length,
|
||||||
"MITM: forwarding request"
|
"MITM: forwarding request"
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// ── Ensure upstream connection is alive ──────────────────────────────
|
// ── Ensure upstream connection is alive ──────────────────────────────
|
||||||
// Lazily connect on first request, or reconnect if the previous connection died
|
// Lazily connect on first request, or reconnect if the previous connection died
|
||||||
@@ -643,10 +651,15 @@ async fn handle_http_over_tls(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(domain, streaming = is_streaming_response,
|
if is_streaming_response {
|
||||||
content_length = ?response_content_length,
|
info!(domain,
|
||||||
content_type = %content_type,
|
content_type = %content_type,
|
||||||
status = resp.code, "MITM: got response headers");
|
status = resp.code, "MITM: streaming response");
|
||||||
|
} else {
|
||||||
|
debug!(domain,
|
||||||
|
content_type = %content_type,
|
||||||
|
status = resp.code, "MITM: response headers");
|
||||||
|
}
|
||||||
headers_parsed = true;
|
headers_parsed = true;
|
||||||
|
|
||||||
// Save body for usage parsing
|
// Save body for usage parsing
|
||||||
|
|||||||
Reference in New Issue
Block a user