feat: Implement request generation counter and state management to prevent stale data and unblock Language Server for follow-up requests.

This commit is contained in:
Nikketryhard
2026-02-16 16:21:52 -06:00
parent e6a339d92e
commit 38b4130c55
6 changed files with 255 additions and 100 deletions

View File

@@ -186,6 +186,11 @@ pub(crate) async fn handle_completions(
model_name, body.stream
);
// Diagnostic: dump OpenCode's raw request
if let Ok(pretty) = serde_json::to_string_pretty(&body) {
let _ = std::fs::write("/tmp/opencode-request.json", &pretty);
}
let model = match lookup_model(model_name) {
Some(m) => m,
None => {
@@ -533,6 +538,8 @@ async fn chat_completions_stream(
let mut keepalive_counter: u64 = 0;
let mut last_thinking_len: usize = 0;
let mut complete_polls: u32 = 0;
let mut did_unblock_ls = false; // Prevents infinite unblock loops
let mut my_generation = state.mitm_store.current_generation();
// Helper: build usage JSON from MITM tokens
let build_usage = |pt: u64, ct: u64, crt: u64, tt: u64| -> serde_json::Value {
@@ -567,6 +574,13 @@ async fn chat_completions_stream(
break;
}
// Bail if another completions handler has superseded us
if state.mitm_store.current_generation() != my_generation {
debug!("Completions: generation changed (superseded), ending stream");
yield Ok(Event::default().data("[DONE]"));
return;
}
// ── Check for MITM-captured function calls FIRST ──
// This runs independently of LS steps — the MITM captures tool calls
// at the proxy layer, so we don't need to wait for LS processing.
@@ -661,9 +675,6 @@ async fn chat_completions_stream(
}
// Check if MITM response is complete
// Must have ACTUAL content (response text or function calls) — not just thinking.
// The LS makes multiple API calls and response_complete flips on each one,
// so we wait for it to be stable across 2+ polls with real content.
if state.mitm_store.is_response_complete() {
if !last_text.is_empty() {
// Have actual response text — done
@@ -691,13 +702,28 @@ async fn chat_completions_stream(
yield Ok(Event::default().data("[DONE]"));
return;
}
} else if last_thinking_len > 0 {
// Only thinking so far — wait for actual text/tools to arrive
// The LS may still be processing and will make follow-up API calls
} else if last_thinking_len > 0 && !did_unblock_ls {
// Thinking-only response. The LS needs follow-up API calls
// to get actual function calls or text. Unblock once.
did_unblock_ls = true;
complete_polls = 0;
// Bump generation FIRST — invalidates old MITM connection's store writes
my_generation = state.mitm_store.bump_generation();
state.mitm_store.clear_request_in_flight();
state.mitm_store.clear_response_complete();
// Drain store so leaked connections can't produce stale content
state.mitm_store.set_response_text("").await;
state.mitm_store.set_thinking_text("").await;
let _ = state.mitm_store.take_any_function_calls().await;
debug!(
"Completions: thinking-only — unblocking LS for follow-up, thinking_len={}, new_gen={}",
last_thinking_len, my_generation
);
} else if last_thinking_len > 0 && did_unblock_ls {
// Already unblocked once. Still only thinking after follow-up.
complete_polls += 1;
if complete_polls >= 6 {
// Waited ~2s with no text/tools after complete — emit what we have
debug!("Completions: MITM thinking-only timeout, thinking_len={}", last_thinking_len);
if complete_polls >= 25 {
info!("Completions: thinking-only timeout after ~10s, thinking_len={}", last_thinking_len);
let mitm = state.mitm_store.take_usage(&cascade_id).await
.or(state.mitm_store.take_usage("_latest").await);
let fr = google_to_openai_finish_reason(mitm.as_ref().and_then(|u| u.stop_reason.as_deref()));