feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.

2026-02-16 16:21:52 -06:00
parent e6a339d92e
commit 38b4130c55
6 changed files with 255 additions and 100 deletions
--- a/src/api/completions.rs
+++ b/src/api/completions.rs
@@ -186,6 +186,11 @@ pub(crate) async fn handle_completions(
        model_name, body.stream
    );

+    // Diagnostic: dump OpenCode's raw request
+    if let Ok(pretty) = serde_json::to_string_pretty(&body) {
+        let _ = std::fs::write("/tmp/opencode-request.json", &pretty);
+    }
+
    let model = match lookup_model(model_name) {
        Some(m) => m,
        None => {
@@ -533,6 +538,8 @@ async fn chat_completions_stream(
        let mut keepalive_counter: u64 = 0;
        let mut last_thinking_len: usize = 0;
        let mut complete_polls: u32 = 0;
+        let mut did_unblock_ls = false; // Prevents infinite unblock loops
+        let mut my_generation = state.mitm_store.current_generation();

        // Helper: build usage JSON from MITM tokens
        let build_usage = |pt: u64, ct: u64, crt: u64, tt: u64| -> serde_json::Value {
@@ -567,6 +574,13 @@ async fn chat_completions_stream(
                break;
            }

+            // Bail if another completions handler has superseded us
+            if state.mitm_store.current_generation() != my_generation {
+                debug!("Completions: generation changed (superseded), ending stream");
+                yield Ok(Event::default().data("[DONE]"));
+                return;
+            }
+
            // ── Check for MITM-captured function calls FIRST ──
            // This runs independently of LS steps — the MITM captures tool calls
            // at the proxy layer, so we don't need to wait for LS processing.
@@ -661,9 +675,6 @@ async fn chat_completions_stream(
                }

                // Check if MITM response is complete
-                // Must have ACTUAL content (response text or function calls) — not just thinking.
-                // The LS makes multiple API calls and response_complete flips on each one,
-                // so we wait for it to be stable across 2+ polls with real content.
                if state.mitm_store.is_response_complete() {
                    if !last_text.is_empty() {
                        // Have actual response text — done
@@ -691,13 +702,28 @@ async fn chat_completions_stream(
                            yield Ok(Event::default().data("[DONE]"));
                            return;
                        }
-                    } else if last_thinking_len > 0 {
-                        // Only thinking so far — wait for actual text/tools to arrive
-                        // The LS may still be processing and will make follow-up API calls
+                    } else if last_thinking_len > 0 && !did_unblock_ls {
+                        // Thinking-only response. The LS needs follow-up API calls
+                        // to get actual function calls or text. Unblock once.
+                        did_unblock_ls = true;
+                        complete_polls = 0;
+                        // Bump generation FIRST — invalidates old MITM connection's store writes
+                        my_generation = state.mitm_store.bump_generation();
+                        state.mitm_store.clear_request_in_flight();
+                        state.mitm_store.clear_response_complete();
+                        // Drain store so leaked connections can't produce stale content
+                        state.mitm_store.set_response_text("").await;
+                        state.mitm_store.set_thinking_text("").await;
+                        let _ = state.mitm_store.take_any_function_calls().await;
+                        debug!(
+                            "Completions: thinking-only — unblocking LS for follow-up, thinking_len={}, new_gen={}",
+                            last_thinking_len, my_generation
+                        );
+                    } else if last_thinking_len > 0 && did_unblock_ls {
+                        // Already unblocked once. Still only thinking after follow-up.
                        complete_polls += 1;
-                        if complete_polls >= 6 {
-                            // Waited ~2s with no text/tools after complete — emit what we have
-                            debug!("Completions: MITM thinking-only timeout, thinking_len={}", last_thinking_len);
+                        if complete_polls >= 25 {
+                            info!("Completions: thinking-only timeout after ~10s, thinking_len={}", last_thinking_len);
                            let mitm = state.mitm_store.take_usage(&cascade_id).await
                                .or(state.mitm_store.take_usage("_latest").await);
                            let fr = google_to_openai_finish_reason(mitm.as_ref().and_then(|u| u.stop_reason.as_deref()));