feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.

2026-02-16 16:21:52 -06:00
parent e6a339d92e
commit 38b4130c55
6 changed files with 255 additions and 100 deletions
--- a/src/mitm/intercept.rs
+++ b/src/mitm/intercept.rs
@@ -28,17 +28,34 @@ pub fn parse_non_streaming_response(body: &[u8]) -> Option<ApiUsage> {
    extract_usage_from_message(&json)
 }

-/// Parse SSE events from a streaming Anthropic response body chunk.
+/// Parse SSE events from a streaming response body chunk.
 ///
-/// Events of interest:
-/// - `message_start` — contains `message.usage.input_tokens` + cache tokens
-/// - `message_delta` — contains `usage.output_tokens`
-/// - `message_stop` — marks end (no usage data)
-///
-/// Returns accumulated usage across all events in this chunk.
+/// Handles chunked transfer encoding where JSON data may be split across
+/// TCP reads. Buffers raw data in the accumulator and only parses
+/// complete newline-terminated lines.
 pub fn parse_streaming_chunk(chunk: &str, accumulator: &mut StreamingAccumulator) {
-    for line in chunk.lines() {
-        if let Some(data) = line.strip_prefix("data: ") {
+    accumulator.pending_data.push_str(chunk);
+
+    // Extract and process all complete lines (terminated by \n).
+    // Leave any trailing partial line in the buffer for the next read.
+    loop {
+        let pos = match accumulator.pending_data.find('\n') {
+            Some(p) => p,
+            None => break,
+        };
+
+        let line = accumulator.pending_data[..pos]
+            .trim_end_matches('\r')
+            .to_string();
+        accumulator.pending_data = accumulator.pending_data[pos + 1..].to_string();
+
+        // Skip empty lines and chunked TE size lines (pure hex)
+        let t = line.trim();
+        if t.is_empty() || t.chars().all(|c| c.is_ascii_hexdigit()) {
+            continue;
+        }
+
+        if let Some(data) = t.strip_prefix("data: ") {
            if data.trim() == "[DONE]" {
                continue;
            }
@@ -69,8 +86,9 @@ pub struct StreamingAccumulator {
    /// Captured function calls from Google's response.
    pub function_calls: Vec<CapturedFunctionCall>,
    /// Captured grounding metadata from Google Search grounding.
-    /// Contains search queries, web results, and citations.
    pub grounding_metadata: Option<serde_json::Value>,
+    /// Buffer for reassembling lines split across TCP reads.
+    pub pending_data: String,
 }

 impl StreamingAccumulator {
@@ -539,4 +557,36 @@ data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text
        let usage = acc.into_usage();
        assert_eq!(usage.thinking_output_tokens, 0);
    }
+
+    /// Regression test: reproduces the exact TCP fragmentation from the SSE dump.
+    /// The `data:` line containing `finishReason: STOP` is split across two reads.
+    #[test]
+    fn test_split_tcp_reads() {
+        let mut acc = StreamingAccumulator::new();
+
+        // TCP read 1: complete first event
+        let chunk1 = "164\r\ndata: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\",\"parts\": [{\"text\": \"yo\"}]}}],\"usageMetadata\": {\"promptTokenCount\": 100,\"candidatesTokenCount\": 1,\"totalTokenCount\": 101},\"modelVersion\": \"gemini-3-flash\"},\"traceId\": \"abc\",\"metadata\": {}}\r\n\r\n\r\n";
+        parse_streaming_chunk(chunk1, &mut acc);
+        assert_eq!(acc.response_text, "yo");
+        assert!(!acc.is_complete); // no finishReason yet
+
+        // TCP read 2: PARTIAL second event — JSON cut mid-traceId
+        let chunk2 = "200\r\ndata: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\",\"parts\": [{\"text\": \"\"}]},\"finishReason\": \"STOP\"}],\"usageMetadata\": {\"promptTokenCount\": 100,\"candidatesTokenCount\": 1,\"totalTokenCount\": 101},\"modelVersion\": \"gemini-3-flash\"},\"traceId\": \"abc123";
+        parse_streaming_chunk(chunk2, &mut acc);
+        // Still not complete — the line hasn't ended yet (no \n)
+        assert!(
+            !acc.is_complete,
+            "should NOT be complete yet — JSON line is still partial"
+        );
+
+        // TCP read 3: rest of the JSON + chunked TE terminator
+        let chunk3 = "def\",\"metadata\": {}}\r\n\r\n\r\n0\r\n\r\n";
+        parse_streaming_chunk(chunk3, &mut acc);
+        // NOW the line is complete and should be parsed
+        assert!(
+            acc.is_complete,
+            "finishReason: STOP should be detected after reassembly"
+        );
+        assert_eq!(acc.stop_reason, Some("STOP".to_string()));
+    }
 }