feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.

This commit is contained in:
Nikketryhard
2026-02-16 16:21:52 -06:00
parent e6a339d92e
commit 38b4130c55
6 changed files with 255 additions and 100 deletions

View File

@@ -28,17 +28,34 @@ pub fn parse_non_streaming_response(body: &[u8]) -> Option<ApiUsage> {
extract_usage_from_message(&json)
}
/// Parse SSE events from a streaming Anthropic response body chunk.
/// Parse SSE events from a streaming response body chunk.
///
/// Events of interest:
/// - `message_start` — contains `message.usage.input_tokens` + cache tokens
/// - `message_delta` — contains `usage.output_tokens`
/// - `message_stop` — marks end (no usage data)
///
/// Returns accumulated usage across all events in this chunk.
/// Handles chunked transfer encoding where JSON data may be split across
/// TCP reads. Buffers raw data in the accumulator and only parses
/// complete newline-terminated lines.
pub fn parse_streaming_chunk(chunk: &str, accumulator: &mut StreamingAccumulator) {
for line in chunk.lines() {
if let Some(data) = line.strip_prefix("data: ") {
accumulator.pending_data.push_str(chunk);
// Extract and process all complete lines (terminated by \n).
// Leave any trailing partial line in the buffer for the next read.
loop {
let pos = match accumulator.pending_data.find('\n') {
Some(p) => p,
None => break,
};
let line = accumulator.pending_data[..pos]
.trim_end_matches('\r')
.to_string();
accumulator.pending_data = accumulator.pending_data[pos + 1..].to_string();
// Skip empty lines and chunked TE size lines (pure hex)
let t = line.trim();
if t.is_empty() || t.chars().all(|c| c.is_ascii_hexdigit()) {
continue;
}
if let Some(data) = t.strip_prefix("data: ") {
if data.trim() == "[DONE]" {
continue;
}
@@ -69,8 +86,9 @@ pub struct StreamingAccumulator {
/// Captured function calls from Google's response.
pub function_calls: Vec<CapturedFunctionCall>,
/// Captured grounding metadata from Google Search grounding.
/// Contains search queries, web results, and citations.
pub grounding_metadata: Option<serde_json::Value>,
/// Buffer for reassembling lines split across TCP reads.
pub pending_data: String,
}
impl StreamingAccumulator {
@@ -539,4 +557,36 @@ data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text
let usage = acc.into_usage();
assert_eq!(usage.thinking_output_tokens, 0);
}
/// Regression test: reproduces the exact TCP fragmentation from the SSE dump.
/// The `data:` line containing `finishReason: STOP` is split across two reads.
#[test]
fn test_split_tcp_reads() {
let mut acc = StreamingAccumulator::new();
// TCP read 1: complete first event
let chunk1 = "164\r\ndata: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\",\"parts\": [{\"text\": \"yo\"}]}}],\"usageMetadata\": {\"promptTokenCount\": 100,\"candidatesTokenCount\": 1,\"totalTokenCount\": 101},\"modelVersion\": \"gemini-3-flash\"},\"traceId\": \"abc\",\"metadata\": {}}\r\n\r\n\r\n";
parse_streaming_chunk(chunk1, &mut acc);
assert_eq!(acc.response_text, "yo");
assert!(!acc.is_complete); // no finishReason yet
// TCP read 2: PARTIAL second event — JSON cut mid-traceId
let chunk2 = "200\r\ndata: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\",\"parts\": [{\"text\": \"\"}]},\"finishReason\": \"STOP\"}],\"usageMetadata\": {\"promptTokenCount\": 100,\"candidatesTokenCount\": 1,\"totalTokenCount\": 101},\"modelVersion\": \"gemini-3-flash\"},\"traceId\": \"abc123";
parse_streaming_chunk(chunk2, &mut acc);
// Still not complete — the line hasn't ended yet (no \n)
assert!(
!acc.is_complete,
"should NOT be complete yet — JSON line is still partial"
);
// TCP read 3: rest of the JSON + chunked TE terminator
let chunk3 = "def\",\"metadata\": {}}\r\n\r\n\r\n0\r\n\r\n";
parse_streaming_chunk(chunk3, &mut acc);
// NOW the line is complete and should be parsed
assert!(
acc.is_complete,
"finishReason: STOP should be detected after reassembly"
);
assert_eq!(acc.stop_reason, Some("STOP".to_string()));
}
}