feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.

This commit is contained in:
Nikketryhard
2026-02-16 16:21:52 -06:00
parent e6a339d92e
commit 38b4130c55
6 changed files with 255 additions and 100 deletions

View File

@@ -538,6 +538,10 @@ async fn handle_http_over_tls(
}
};
// Generation tracking for store write guards
let mut won_gate = false;
let mut conn_generation = store.current_generation();
// Log LLM calls at info, everything else at debug
if req_path.contains("streamGenerateContent") {
let body_len = request_buf.len() - headers_end;
@@ -549,26 +553,35 @@ async fn handle_http_over_tls(
"MITM: forwarding LLM request"
);
// ── Block ALL requests when one is already in-flight ─────────
// ── Atomic in-flight gate ─────────────────────────────────
// The LS opens multiple connections and sends parallel requests.
// When custom tools are active, only the FIRST request should reach
// Google. Block everything else with a fake response.
if store.is_request_in_flight() {
info!("MITM: blocking LS request — another request already in-flight");
let fake_response = "HTTP/1.1 200 OK\r\n\
Content-Type: text/event-stream\r\n\
Transfer-Encoding: chunked\r\n\
\r\n";
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
let mut response = fake_response.as_bytes().to_vec();
response.extend_from_slice(&chunked_body);
if let Err(e) = client.write_all(&response).await {
warn!(error = %e, "MITM: failed to write fake response");
// When custom tools are active, only the FIRST request wins the
// atomic compare_exchange. All others get fake STOP responses.
let has_tools = store.get_tools().await.is_some();
won_gate = if has_tools {
if !store.try_mark_request_in_flight() {
info!("MITM: blocking LS request — another request already in-flight");
let fake_response = "HTTP/1.1 200 OK\r\n\
Content-Type: text/event-stream\r\n\
Transfer-Encoding: chunked\r\n\
\r\n";
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
let mut response = fake_response.as_bytes().to_vec();
response.extend_from_slice(&chunked_body);
if let Err(e) = client.write_all(&response).await {
warn!(error = %e, "MITM: failed to write fake response");
}
let _ = client.flush().await;
continue;
}
let _ = client.flush().await;
continue;
}
true
} else {
false
};
// Snapshot the generation at gate-win time. If it changes later,
// another completions turn started and our data is stale.
conn_generation = store.current_generation();
// ── Request modification ─────────────────────────────────────
// Dechunk body → check if agent request → modify → rechunk
@@ -620,8 +633,7 @@ async fn handle_http_over_tls(
new_buf.extend_from_slice(&new_chunked);
request_buf = new_buf;
// Mark in-flight IMMEDIATELY — blocks all subsequent requests
store.mark_request_in_flight();
// In-flight already marked atomically above
}
}
}
@@ -797,33 +809,46 @@ async fn handle_http_over_tls(
let body = String::from_utf8_lossy(&header_buf[hdr_end..]);
parse_streaming_chunk(&body, &mut streaming_acc);
// Store captured function calls (drain to avoid re-storing on next chunk)
if !streaming_acc.function_calls.is_empty() {
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
for fc in &calls {
store
.record_function_call(cascade_hint.as_deref(), fc.clone())
.await;
// Only write to store if our generation is still current.
// If another completions turn started, our data is stale.
let gen_valid = !won_gate || store.current_generation() == conn_generation;
if gen_valid {
// Store captured function calls (drain to avoid re-storing on next chunk)
if !streaming_acc.function_calls.is_empty() {
let calls: Vec<_> =
streaming_acc.function_calls.drain(..).collect();
for fc in &calls {
store
.record_function_call(cascade_hint.as_deref(), fc.clone())
.await;
}
store.set_last_function_calls(calls.clone()).await;
info!(
"MITM: stored {} function call(s) from initial body",
calls.len()
);
}
store.set_last_function_calls(calls.clone()).await;
info!(
"MITM: stored {} function call(s) from initial body",
calls.len()
);
}
// Capture response + thinking text + grounding into MitmStore
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
store.mark_response_complete();
// Capture response + thinking text + grounding into MitmStore
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
info!(
response_text_len = streaming_acc.response_text.len(),
thinking_text_len = streaming_acc.thinking_text.len(),
"MITM: response complete (initial body) — marking store"
);
store.mark_response_complete();
}
} else if streaming_acc.is_complete {
debug!("MITM: skipping store write — generation stale (initial body)");
}
}
@@ -862,33 +887,45 @@ async fn handle_http_over_tls(
let s = String::from_utf8_lossy(chunk);
parse_streaming_chunk(&s, &mut streaming_acc);
// Store captured function calls (drain to avoid re-storing on next chunk)
if !streaming_acc.function_calls.is_empty() {
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
for fc in &calls {
store
.record_function_call(cascade_hint.as_deref(), fc.clone())
.await;
// Only write to store if our generation is still current.
let gen_valid = !won_gate || store.current_generation() == conn_generation;
if gen_valid {
// Store captured function calls (drain to avoid re-storing on next chunk)
if !streaming_acc.function_calls.is_empty() {
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
for fc in &calls {
store
.record_function_call(cascade_hint.as_deref(), fc.clone())
.await;
}
store.set_last_function_calls(calls.clone()).await;
info!(
"MITM: stored {} function call(s) from body chunk",
calls.len()
);
}
store.set_last_function_calls(calls.clone()).await;
info!(
"MITM: stored {} function call(s) from body chunk",
calls.len()
);
}
// Capture response + thinking text + grounding into MitmStore
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
store.mark_response_complete();
// Capture response + thinking text + grounding into MitmStore
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
info!(
response_text_len = streaming_acc.response_text.len(),
thinking_text_len = streaming_acc.thinking_text.len(),
function_calls = streaming_acc.function_calls.len(),
"MITM: response complete — marking store"
);
store.mark_response_complete();
}
} else if streaming_acc.is_complete {
debug!("MITM: skipping store write — generation stale (body chunk)");
}
}