feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.
This commit is contained in:
@@ -538,6 +538,10 @@ async fn handle_http_over_tls(
|
||||
}
|
||||
};
|
||||
|
||||
// Generation tracking for store write guards
|
||||
let mut won_gate = false;
|
||||
let mut conn_generation = store.current_generation();
|
||||
|
||||
// Log LLM calls at info, everything else at debug
|
||||
if req_path.contains("streamGenerateContent") {
|
||||
let body_len = request_buf.len() - headers_end;
|
||||
@@ -549,26 +553,35 @@ async fn handle_http_over_tls(
|
||||
"MITM: forwarding LLM request"
|
||||
);
|
||||
|
||||
// ── Block ALL requests when one is already in-flight ─────────
|
||||
// ── Atomic in-flight gate ─────────────────────────────────
|
||||
// The LS opens multiple connections and sends parallel requests.
|
||||
// When custom tools are active, only the FIRST request should reach
|
||||
// Google. Block everything else with a fake response.
|
||||
if store.is_request_in_flight() {
|
||||
info!("MITM: blocking LS request — another request already in-flight");
|
||||
let fake_response = "HTTP/1.1 200 OK\r\n\
|
||||
Content-Type: text/event-stream\r\n\
|
||||
Transfer-Encoding: chunked\r\n\
|
||||
\r\n";
|
||||
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
|
||||
let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
|
||||
let mut response = fake_response.as_bytes().to_vec();
|
||||
response.extend_from_slice(&chunked_body);
|
||||
if let Err(e) = client.write_all(&response).await {
|
||||
warn!(error = %e, "MITM: failed to write fake response");
|
||||
// When custom tools are active, only the FIRST request wins the
|
||||
// atomic compare_exchange. All others get fake STOP responses.
|
||||
let has_tools = store.get_tools().await.is_some();
|
||||
won_gate = if has_tools {
|
||||
if !store.try_mark_request_in_flight() {
|
||||
info!("MITM: blocking LS request — another request already in-flight");
|
||||
let fake_response = "HTTP/1.1 200 OK\r\n\
|
||||
Content-Type: text/event-stream\r\n\
|
||||
Transfer-Encoding: chunked\r\n\
|
||||
\r\n";
|
||||
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
|
||||
let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
|
||||
let mut response = fake_response.as_bytes().to_vec();
|
||||
response.extend_from_slice(&chunked_body);
|
||||
if let Err(e) = client.write_all(&response).await {
|
||||
warn!(error = %e, "MITM: failed to write fake response");
|
||||
}
|
||||
let _ = client.flush().await;
|
||||
continue;
|
||||
}
|
||||
let _ = client.flush().await;
|
||||
continue;
|
||||
}
|
||||
true
|
||||
} else {
|
||||
false
|
||||
};
|
||||
// Snapshot the generation at gate-win time. If it changes later,
|
||||
// another completions turn started and our data is stale.
|
||||
conn_generation = store.current_generation();
|
||||
|
||||
// ── Request modification ─────────────────────────────────────
|
||||
// Dechunk body → check if agent request → modify → rechunk
|
||||
@@ -620,8 +633,7 @@ async fn handle_http_over_tls(
|
||||
new_buf.extend_from_slice(&new_chunked);
|
||||
request_buf = new_buf;
|
||||
|
||||
// Mark in-flight IMMEDIATELY — blocks all subsequent requests
|
||||
store.mark_request_in_flight();
|
||||
// In-flight already marked atomically above
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -797,33 +809,46 @@ async fn handle_http_over_tls(
|
||||
let body = String::from_utf8_lossy(&header_buf[hdr_end..]);
|
||||
parse_streaming_chunk(&body, &mut streaming_acc);
|
||||
|
||||
// Store captured function calls (drain to avoid re-storing on next chunk)
|
||||
if !streaming_acc.function_calls.is_empty() {
|
||||
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
|
||||
for fc in &calls {
|
||||
store
|
||||
.record_function_call(cascade_hint.as_deref(), fc.clone())
|
||||
.await;
|
||||
// Only write to store if our generation is still current.
|
||||
// If another completions turn started, our data is stale.
|
||||
let gen_valid = !won_gate || store.current_generation() == conn_generation;
|
||||
if gen_valid {
|
||||
// Store captured function calls (drain to avoid re-storing on next chunk)
|
||||
if !streaming_acc.function_calls.is_empty() {
|
||||
let calls: Vec<_> =
|
||||
streaming_acc.function_calls.drain(..).collect();
|
||||
for fc in &calls {
|
||||
store
|
||||
.record_function_call(cascade_hint.as_deref(), fc.clone())
|
||||
.await;
|
||||
}
|
||||
store.set_last_function_calls(calls.clone()).await;
|
||||
info!(
|
||||
"MITM: stored {} function call(s) from initial body",
|
||||
calls.len()
|
||||
);
|
||||
}
|
||||
store.set_last_function_calls(calls.clone()).await;
|
||||
info!(
|
||||
"MITM: stored {} function call(s) from initial body",
|
||||
calls.len()
|
||||
);
|
||||
}
|
||||
|
||||
// Capture response + thinking text + grounding into MitmStore
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
store.mark_response_complete();
|
||||
// Capture response + thinking text + grounding into MitmStore
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
info!(
|
||||
response_text_len = streaming_acc.response_text.len(),
|
||||
thinking_text_len = streaming_acc.thinking_text.len(),
|
||||
"MITM: response complete (initial body) — marking store"
|
||||
);
|
||||
store.mark_response_complete();
|
||||
}
|
||||
} else if streaming_acc.is_complete {
|
||||
debug!("MITM: skipping store write — generation stale (initial body)");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -862,33 +887,45 @@ async fn handle_http_over_tls(
|
||||
let s = String::from_utf8_lossy(chunk);
|
||||
parse_streaming_chunk(&s, &mut streaming_acc);
|
||||
|
||||
// Store captured function calls (drain to avoid re-storing on next chunk)
|
||||
if !streaming_acc.function_calls.is_empty() {
|
||||
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
|
||||
for fc in &calls {
|
||||
store
|
||||
.record_function_call(cascade_hint.as_deref(), fc.clone())
|
||||
.await;
|
||||
// Only write to store if our generation is still current.
|
||||
let gen_valid = !won_gate || store.current_generation() == conn_generation;
|
||||
if gen_valid {
|
||||
// Store captured function calls (drain to avoid re-storing on next chunk)
|
||||
if !streaming_acc.function_calls.is_empty() {
|
||||
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
|
||||
for fc in &calls {
|
||||
store
|
||||
.record_function_call(cascade_hint.as_deref(), fc.clone())
|
||||
.await;
|
||||
}
|
||||
store.set_last_function_calls(calls.clone()).await;
|
||||
info!(
|
||||
"MITM: stored {} function call(s) from body chunk",
|
||||
calls.len()
|
||||
);
|
||||
}
|
||||
store.set_last_function_calls(calls.clone()).await;
|
||||
info!(
|
||||
"MITM: stored {} function call(s) from body chunk",
|
||||
calls.len()
|
||||
);
|
||||
}
|
||||
|
||||
// Capture response + thinking text + grounding into MitmStore
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
store.mark_response_complete();
|
||||
// Capture response + thinking text + grounding into MitmStore
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
info!(
|
||||
response_text_len = streaming_acc.response_text.len(),
|
||||
thinking_text_len = streaming_acc.thinking_text.len(),
|
||||
function_calls = streaming_acc.function_calls.len(),
|
||||
"MITM: response complete — marking store"
|
||||
);
|
||||
store.mark_response_complete();
|
||||
}
|
||||
} else if streaming_acc.is_complete {
|
||||
debug!("MITM: skipping store write — generation stale (body chunk)");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user