feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.

2026-02-16 16:21:52 -06:00
parent e6a339d92e
commit 38b4130c55
6 changed files with 255 additions and 100 deletions
--- a/src/mitm/proxy.rs
+++ b/src/mitm/proxy.rs
@@ -538,6 +538,10 @@ async fn handle_http_over_tls(
            }
        };

+        // Generation tracking for store write guards
+        let mut won_gate = false;
+        let mut conn_generation = store.current_generation();
+
        // Log LLM calls at info, everything else at debug
        if req_path.contains("streamGenerateContent") {
            let body_len = request_buf.len() - headers_end;
@@ -549,26 +553,35 @@ async fn handle_http_over_tls(
                "MITM: forwarding LLM request"
            );

-            // ── Block ALL requests when one is already in-flight ─────────
+            // ── Atomic in-flight gate ─────────────────────────────────
            // The LS opens multiple connections and sends parallel requests.
-            // When custom tools are active, only the FIRST request should reach
-            // Google. Block everything else with a fake response.
-            if store.is_request_in_flight() {
-                info!("MITM: blocking LS request — another request already in-flight");
-                let fake_response = "HTTP/1.1 200 OK\r\n\
-                     Content-Type: text/event-stream\r\n\
-                     Transfer-Encoding: chunked\r\n\
-                     \r\n";
-                let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
-                let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
-                let mut response = fake_response.as_bytes().to_vec();
-                response.extend_from_slice(&chunked_body);
-                if let Err(e) = client.write_all(&response).await {
-                    warn!(error = %e, "MITM: failed to write fake response");
+            // When custom tools are active, only the FIRST request wins the
+            // atomic compare_exchange. All others get fake STOP responses.
+            let has_tools = store.get_tools().await.is_some();
+            won_gate = if has_tools {
+                if !store.try_mark_request_in_flight() {
+                    info!("MITM: blocking LS request — another request already in-flight");
+                    let fake_response = "HTTP/1.1 200 OK\r\n\
+                         Content-Type: text/event-stream\r\n\
+                         Transfer-Encoding: chunked\r\n\
+                         \r\n";
+                    let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
+                    let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
+                    let mut response = fake_response.as_bytes().to_vec();
+                    response.extend_from_slice(&chunked_body);
+                    if let Err(e) = client.write_all(&response).await {
+                        warn!(error = %e, "MITM: failed to write fake response");
+                    }
+                    let _ = client.flush().await;
+                    continue;
                }
-                let _ = client.flush().await;
-                continue;
-            }
+                true
+            } else {
+                false
+            };
+            // Snapshot the generation at gate-win time. If it changes later,
+            // another completions turn started and our data is stale.
+            conn_generation = store.current_generation();

            // ── Request modification ─────────────────────────────────────
            // Dechunk body → check if agent request → modify → rechunk
@@ -620,8 +633,7 @@ async fn handle_http_over_tls(
                        new_buf.extend_from_slice(&new_chunked);
                        request_buf = new_buf;

-                        // Mark in-flight IMMEDIATELY — blocks all subsequent requests
-                        store.mark_request_in_flight();
+                        // In-flight already marked atomically above
                    }
                }
            }
@@ -797,33 +809,46 @@ async fn handle_http_over_tls(
                        let body = String::from_utf8_lossy(&header_buf[hdr_end..]);
                        parse_streaming_chunk(&body, &mut streaming_acc);

-                        // Store captured function calls (drain to avoid re-storing on next chunk)
-                        if !streaming_acc.function_calls.is_empty() {
-                            let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
-                            for fc in &calls {
-                                store
-                                    .record_function_call(cascade_hint.as_deref(), fc.clone())
-                                    .await;
+                        // Only write to store if our generation is still current.
+                        // If another completions turn started, our data is stale.
+                        let gen_valid = !won_gate || store.current_generation() == conn_generation;
+                        if gen_valid {
+                            // Store captured function calls (drain to avoid re-storing on next chunk)
+                            if !streaming_acc.function_calls.is_empty() {
+                                let calls: Vec<_> =
+                                    streaming_acc.function_calls.drain(..).collect();
+                                for fc in &calls {
+                                    store
+                                        .record_function_call(cascade_hint.as_deref(), fc.clone())
+                                        .await;
+                                }
+                                store.set_last_function_calls(calls.clone()).await;
+                                info!(
+                                    "MITM: stored {} function call(s) from initial body",
+                                    calls.len()
+                                );
                            }
-                            store.set_last_function_calls(calls.clone()).await;
-                            info!(
-                                "MITM: stored {} function call(s) from initial body",
-                                calls.len()
-                            );
-                        }

-                        // Capture response + thinking text + grounding into MitmStore
-                        if !streaming_acc.response_text.is_empty() {
-                            store.set_response_text(&streaming_acc.response_text).await;
-                        }
-                        if !streaming_acc.thinking_text.is_empty() {
-                            store.set_thinking_text(&streaming_acc.thinking_text).await;
-                        }
-                        if let Some(ref gm) = streaming_acc.grounding_metadata {
-                            store.set_grounding(gm.clone()).await;
-                        }
-                        if streaming_acc.is_complete {
-                            store.mark_response_complete();
+                            // Capture response + thinking text + grounding into MitmStore
+                            if !streaming_acc.response_text.is_empty() {
+                                store.set_response_text(&streaming_acc.response_text).await;
+                            }
+                            if !streaming_acc.thinking_text.is_empty() {
+                                store.set_thinking_text(&streaming_acc.thinking_text).await;
+                            }
+                            if let Some(ref gm) = streaming_acc.grounding_metadata {
+                                store.set_grounding(gm.clone()).await;
+                            }
+                            if streaming_acc.is_complete {
+                                info!(
+                                    response_text_len = streaming_acc.response_text.len(),
+                                    thinking_text_len = streaming_acc.thinking_text.len(),
+                                    "MITM: response complete (initial body) — marking store"
+                                );
+                                store.mark_response_complete();
+                            }
+                        } else if streaming_acc.is_complete {
+                            debug!("MITM: skipping store write — generation stale (initial body)");
                        }
                    }

@@ -862,33 +887,45 @@ async fn handle_http_over_tls(
                let s = String::from_utf8_lossy(chunk);
                parse_streaming_chunk(&s, &mut streaming_acc);

-                // Store captured function calls (drain to avoid re-storing on next chunk)
-                if !streaming_acc.function_calls.is_empty() {
-                    let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
-                    for fc in &calls {
-                        store
-                            .record_function_call(cascade_hint.as_deref(), fc.clone())
-                            .await;
+                // Only write to store if our generation is still current.
+                let gen_valid = !won_gate || store.current_generation() == conn_generation;
+                if gen_valid {
+                    // Store captured function calls (drain to avoid re-storing on next chunk)
+                    if !streaming_acc.function_calls.is_empty() {
+                        let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
+                        for fc in &calls {
+                            store
+                                .record_function_call(cascade_hint.as_deref(), fc.clone())
+                                .await;
+                        }
+                        store.set_last_function_calls(calls.clone()).await;
+                        info!(
+                            "MITM: stored {} function call(s) from body chunk",
+                            calls.len()
+                        );
                    }
-                    store.set_last_function_calls(calls.clone()).await;
-                    info!(
-                        "MITM: stored {} function call(s) from body chunk",
-                        calls.len()
-                    );
-                }

-                // Capture response + thinking text + grounding into MitmStore
-                if !streaming_acc.response_text.is_empty() {
-                    store.set_response_text(&streaming_acc.response_text).await;
-                }
-                if !streaming_acc.thinking_text.is_empty() {
-                    store.set_thinking_text(&streaming_acc.thinking_text).await;
-                }
-                if let Some(ref gm) = streaming_acc.grounding_metadata {
-                    store.set_grounding(gm.clone()).await;
-                }
-                if streaming_acc.is_complete {
-                    store.mark_response_complete();
+                    // Capture response + thinking text + grounding into MitmStore
+                    if !streaming_acc.response_text.is_empty() {
+                        store.set_response_text(&streaming_acc.response_text).await;
+                    }
+                    if !streaming_acc.thinking_text.is_empty() {
+                        store.set_thinking_text(&streaming_acc.thinking_text).await;
+                    }
+                    if let Some(ref gm) = streaming_acc.grounding_metadata {
+                        store.set_grounding(gm.clone()).await;
+                    }
+                    if streaming_acc.is_complete {
+                        info!(
+                            response_text_len = streaming_acc.response_text.len(),
+                            thinking_text_len = streaming_acc.thinking_text.len(),
+                            function_calls = streaming_acc.function_calls.len(),
+                            "MITM: response complete — marking store"
+                        );
+                        store.mark_response_complete();
+                    }
+                } else if streaming_acc.is_complete {
+                    debug!("MITM: skipping store write — generation stale (body chunk)");
                }
            }