feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.
This commit is contained in:
@@ -28,17 +28,34 @@ pub fn parse_non_streaming_response(body: &[u8]) -> Option<ApiUsage> {
|
||||
extract_usage_from_message(&json)
|
||||
}
|
||||
|
||||
/// Parse SSE events from a streaming Anthropic response body chunk.
|
||||
/// Parse SSE events from a streaming response body chunk.
|
||||
///
|
||||
/// Events of interest:
|
||||
/// - `message_start` — contains `message.usage.input_tokens` + cache tokens
|
||||
/// - `message_delta` — contains `usage.output_tokens`
|
||||
/// - `message_stop` — marks end (no usage data)
|
||||
///
|
||||
/// Returns accumulated usage across all events in this chunk.
|
||||
/// Handles chunked transfer encoding where JSON data may be split across
|
||||
/// TCP reads. Buffers raw data in the accumulator and only parses
|
||||
/// complete newline-terminated lines.
|
||||
pub fn parse_streaming_chunk(chunk: &str, accumulator: &mut StreamingAccumulator) {
|
||||
for line in chunk.lines() {
|
||||
if let Some(data) = line.strip_prefix("data: ") {
|
||||
accumulator.pending_data.push_str(chunk);
|
||||
|
||||
// Extract and process all complete lines (terminated by \n).
|
||||
// Leave any trailing partial line in the buffer for the next read.
|
||||
loop {
|
||||
let pos = match accumulator.pending_data.find('\n') {
|
||||
Some(p) => p,
|
||||
None => break,
|
||||
};
|
||||
|
||||
let line = accumulator.pending_data[..pos]
|
||||
.trim_end_matches('\r')
|
||||
.to_string();
|
||||
accumulator.pending_data = accumulator.pending_data[pos + 1..].to_string();
|
||||
|
||||
// Skip empty lines and chunked TE size lines (pure hex)
|
||||
let t = line.trim();
|
||||
if t.is_empty() || t.chars().all(|c| c.is_ascii_hexdigit()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(data) = t.strip_prefix("data: ") {
|
||||
if data.trim() == "[DONE]" {
|
||||
continue;
|
||||
}
|
||||
@@ -69,8 +86,9 @@ pub struct StreamingAccumulator {
|
||||
/// Captured function calls from Google's response.
|
||||
pub function_calls: Vec<CapturedFunctionCall>,
|
||||
/// Captured grounding metadata from Google Search grounding.
|
||||
/// Contains search queries, web results, and citations.
|
||||
pub grounding_metadata: Option<serde_json::Value>,
|
||||
/// Buffer for reassembling lines split across TCP reads.
|
||||
pub pending_data: String,
|
||||
}
|
||||
|
||||
impl StreamingAccumulator {
|
||||
@@ -539,4 +557,36 @@ data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text
|
||||
let usage = acc.into_usage();
|
||||
assert_eq!(usage.thinking_output_tokens, 0);
|
||||
}
|
||||
|
||||
/// Regression test: reproduces the exact TCP fragmentation from the SSE dump.
|
||||
/// The `data:` line containing `finishReason: STOP` is split across two reads.
|
||||
#[test]
|
||||
fn test_split_tcp_reads() {
|
||||
let mut acc = StreamingAccumulator::new();
|
||||
|
||||
// TCP read 1: complete first event
|
||||
let chunk1 = "164\r\ndata: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\",\"parts\": [{\"text\": \"yo\"}]}}],\"usageMetadata\": {\"promptTokenCount\": 100,\"candidatesTokenCount\": 1,\"totalTokenCount\": 101},\"modelVersion\": \"gemini-3-flash\"},\"traceId\": \"abc\",\"metadata\": {}}\r\n\r\n\r\n";
|
||||
parse_streaming_chunk(chunk1, &mut acc);
|
||||
assert_eq!(acc.response_text, "yo");
|
||||
assert!(!acc.is_complete); // no finishReason yet
|
||||
|
||||
// TCP read 2: PARTIAL second event — JSON cut mid-traceId
|
||||
let chunk2 = "200\r\ndata: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\",\"parts\": [{\"text\": \"\"}]},\"finishReason\": \"STOP\"}],\"usageMetadata\": {\"promptTokenCount\": 100,\"candidatesTokenCount\": 1,\"totalTokenCount\": 101},\"modelVersion\": \"gemini-3-flash\"},\"traceId\": \"abc123";
|
||||
parse_streaming_chunk(chunk2, &mut acc);
|
||||
// Still not complete — the line hasn't ended yet (no \n)
|
||||
assert!(
|
||||
!acc.is_complete,
|
||||
"should NOT be complete yet — JSON line is still partial"
|
||||
);
|
||||
|
||||
// TCP read 3: rest of the JSON + chunked TE terminator
|
||||
let chunk3 = "def\",\"metadata\": {}}\r\n\r\n\r\n0\r\n\r\n";
|
||||
parse_streaming_chunk(chunk3, &mut acc);
|
||||
// NOW the line is complete and should be parsed
|
||||
assert!(
|
||||
acc.is_complete,
|
||||
"finishReason: STOP should be detected after reassembly"
|
||||
);
|
||||
assert_eq!(acc.stop_reason, Some("STOP".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user