From ca36ab0631037288ecd30eebf05d30ecae3e5286 Mon Sep 17 00:00:00 2001 From: Nikketryhard Date: Sat, 14 Feb 2026 17:55:17 -0600 Subject: [PATCH] chore: clean up MITM logs and add Google SSE tests - Demote non-LLM request logs to debug (only streamGenerateContent at info) - Demote non-streaming response headers to debug - Add 5 Google SSE parser tests (single event, multi-event accumulation, chunked framing, completion detection, no-thinking-tokens) - Fix unused variable warning in proxy.rs --- src/mitm/intercept.rs | 151 ++++++++++++++++++++++++++++++++++++++++++ src/mitm/proxy.rs | 39 +++++++---- 2 files changed, 177 insertions(+), 13 deletions(-) diff --git a/src/mitm/intercept.rs b/src/mitm/intercept.rs index 5adb4b0..6d965a2 100644 --- a/src/mitm/intercept.rs +++ b/src/mitm/intercept.rs @@ -291,5 +291,156 @@ mod tests { let usage = acc.into_usage(); assert_eq!(usage.input_tokens, 200); assert_eq!(usage.output_tokens, 75); + assert_eq!(usage.api_provider, Some("anthropic".to_string())); + } + + #[test] + fn test_google_sse_single_event() { + let mut acc = StreamingAccumulator::new(); + + let event = serde_json::json!({ + "response": { + "candidates": [{"content": {"role": "model", "parts": [{"text": "4"}]}}], + "usageMetadata": { + "promptTokenCount": 1514, + "candidatesTokenCount": 25, + "totalTokenCount": 1539, + "thoughtsTokenCount": 52 + }, + "modelVersion": "gemini-3-flash", + "responseId": "abc123" + }, + "traceId": "trace456", + "metadata": {} + }); + + acc.process_event(&event); + assert_eq!(acc.input_tokens, 1514); + assert_eq!(acc.output_tokens, 25); + assert_eq!(acc.thinking_tokens, 52); + assert_eq!(acc.model, Some("gemini-3-flash".to_string())); + assert!(!acc.is_complete); // no finishReason yet + assert_eq!(acc.api_provider, Some("google".to_string())); + } + + #[test] + fn test_google_sse_multi_event_accumulation() { + let mut acc = StreamingAccumulator::new(); + + // First event — partial response + let event1 = serde_json::json!({ + "response": { + "candidates": [{"content": {"role": "model", "parts": [{"text": "Hello"}]}}], + "usageMetadata": { + "promptTokenCount": 1514, + "candidatesTokenCount": 6, + "totalTokenCount": 1520 + }, + "modelVersion": "gemini-2.5-flash-lite" + }, + "traceId": "t1", + "metadata": {} + }); + acc.process_event(&event1); + assert_eq!(acc.output_tokens, 6); + assert!(!acc.is_complete); + + // Second event — more output + let event2 = serde_json::json!({ + "response": { + "candidates": [{"content": {"role": "model", "parts": [{"text": " world"}]}}], + "usageMetadata": { + "promptTokenCount": 1514, + "candidatesTokenCount": 22, + "totalTokenCount": 1536 + }, + "modelVersion": "gemini-2.5-flash-lite" + }, + "traceId": "t1", + "metadata": {} + }); + acc.process_event(&event2); + assert_eq!(acc.output_tokens, 22); // cumulative, not additive + + // Third event — completion + let event3 = serde_json::json!({ + "response": { + "candidates": [{"content": {"role": "model", "parts": [{"text": "!"}]}, + "finishReason": "STOP"}], + "usageMetadata": { + "promptTokenCount": 1514, + "candidatesTokenCount": 25, + "totalTokenCount": 1539, + "thoughtsTokenCount": 52 + }, + "modelVersion": "gemini-2.5-flash-lite" + }, + "traceId": "t1", + "metadata": {} + }); + acc.process_event(&event3); + assert!(acc.is_complete); + assert_eq!(acc.output_tokens, 25); + assert_eq!(acc.thinking_tokens, 52); + assert_eq!(acc.stop_reason, Some("STOP".to_string())); + + let usage = acc.into_usage(); + assert_eq!(usage.input_tokens, 1514); + assert_eq!(usage.output_tokens, 25); + assert_eq!(usage.thinking_output_tokens, 52); + assert_eq!(usage.model, Some("gemini-2.5-flash-lite".to_string())); + assert_eq!(usage.api_provider, Some("google".to_string())); + } + + #[test] + fn test_google_sse_parse_streaming_chunk() { + // Simulates real SSE data with HTTP chunked framing (hex sizes on their own lines) + let chunk = r#"150 +data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text": "4"}]}}],"usageMetadata": {"promptTokenCount": 14615,"candidatesTokenCount": 1,"totalTokenCount": 14668,"thoughtsTokenCount": 52},"modelVersion": "gemini-3-flash","responseId": "agaRacPLC4WHz7IPreOl8QM"},"traceId": "8145be7112baf823","metadata": {}} + + +2f1 +data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text": ""}]},"finishReason": "STOP"}],"usageMetadata": {"promptTokenCount": 14615,"candidatesTokenCount": 1,"totalTokenCount": 14668,"thoughtsTokenCount": 52},"modelVersion": "gemini-3-flash","responseId": "agaRacPLC4WHz7IPreOl8QM"},"traceId": "8145be7112baf823","metadata": {}} + + +0 +"#; + + let mut acc = StreamingAccumulator::new(); + parse_streaming_chunk(chunk, &mut acc); + + assert_eq!(acc.input_tokens, 14615); + assert_eq!(acc.output_tokens, 1); + assert_eq!(acc.thinking_tokens, 52); + assert!(acc.is_complete); + assert_eq!(acc.model, Some("gemini-3-flash".to_string())); + assert_eq!(acc.stop_reason, Some("STOP".to_string())); + } + + #[test] + fn test_google_sse_no_thinking_tokens() { + let mut acc = StreamingAccumulator::new(); + + let event = serde_json::json!({ + "response": { + "candidates": [{"content": {"role": "model", "parts": [{"text": "hi"}]}, + "finishReason": "STOP"}], + "usageMetadata": { + "promptTokenCount": 100, + "candidatesTokenCount": 5, + "totalTokenCount": 105 + }, + "modelVersion": "gemini-2.5-flash-lite" + }, + "traceId": "t1", + "metadata": {} + }); + + acc.process_event(&event); + assert_eq!(acc.thinking_tokens, 0); // no thoughtsTokenCount field + assert!(acc.is_complete); + + let usage = acc.into_usage(); + assert_eq!(usage.thinking_output_tokens, 0); } } diff --git a/src/mitm/proxy.rs b/src/mitm/proxy.rs index a22c4e7..4ba7277 100644 --- a/src/mitm/proxy.rs +++ b/src/mitm/proxy.rs @@ -512,7 +512,7 @@ async fn handle_http_over_tls( } // Parse the HTTP request to find headers and body - let (headers_end, content_length, is_streaming_request) = parse_http_request_meta(&request_buf); + let (headers_end, content_length, _is_streaming_request) = parse_http_request_meta(&request_buf); // Try to extract cascade hint from request body let cascade_hint = if headers_end < request_buf.len() { @@ -533,14 +533,22 @@ async fn handle_http_over_tls( } }; - info!( - domain, - req_path = %req_path, - content_length, - streaming = is_streaming_request, - cascade = ?cascade_hint, - "MITM: forwarding request" - ); + // Log LLM calls at info, everything else at debug + if req_path.contains("streamGenerateContent") { + info!( + domain, + req_path = %req_path, + cascade = ?cascade_hint, + "MITM: forwarding LLM request" + ); + } else { + debug!( + domain, + req_path = %req_path, + content_length, + "MITM: forwarding request" + ); + } // ── Ensure upstream connection is alive ────────────────────────────── // Lazily connect on first request, or reconnect if the previous connection died @@ -643,10 +651,15 @@ async fn handle_http_over_tls( } } - info!(domain, streaming = is_streaming_response, - content_length = ?response_content_length, - content_type = %content_type, - status = resp.code, "MITM: got response headers"); + if is_streaming_response { + info!(domain, + content_type = %content_type, + status = resp.code, "MITM: streaming response"); + } else { + debug!(domain, + content_type = %content_type, + status = resp.code, "MITM: response headers"); + } headers_parsed = true; // Save body for usage parsing