From 5d4125fa0d375ea37529c51575003e85e5453913 Mon Sep 17 00:00:00 2001 From: Nikketryhard Date: Sun, 15 Feb 2026 00:37:39 -0600 Subject: [PATCH] fix: suppress dummy text from tool call responses Check for MITM-captured function calls BEFORE emitting text in the streaming handler. This prevents the dummy 'Tool call completed' placeholder (sent to the LS) from leaking to OpenCode, which was confusing it into infinite loops. Also removes duplicate function call storage at end of response loop since they're now stored immediately when detected. --- src/api/completions.rs | 56 ++++++++++++++++++++++-------------------- src/mitm/proxy.rs | 10 ++------ 2 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/api/completions.rs b/src/api/completions.rs index 9208e66..0e09b47 100644 --- a/src/api/completions.rs +++ b/src/api/completions.rs @@ -212,36 +212,12 @@ async fn chat_completions_stream( if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await { if status == 200 { if let Some(steps) = data["steps"].as_array() { - let text = extract_response_text(steps); - - if !text.is_empty() && text != last_text { - let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) { - &text[last_text.len()..] - } else { - &text - }; - - if !delta.is_empty() { - yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({ - "id": completion_id, - "object": "chat.completion.chunk", - "created": now_unix(), - "model": model_name, - "choices": [{ - "index": 0, - "delta": {"content": delta}, - "finish_reason": serde_json::Value::Null, - }], - })).unwrap_or_default())); - last_text = text.to_string(); - } - } - - // Check for MITM-captured function calls (tool use) + // Check for MITM-captured function calls FIRST (before text) + // This prevents dummy placeholder text from leaking to client let captured = state.mitm_store.take_any_function_calls().await; if let Some(ref calls) = captured { if !calls.is_empty() { - // Emit tool_calls in OpenAI streaming format + // Emit tool_calls in OpenAI streaming format — NO text let mut tool_calls = Vec::new(); for (i, fc) in calls.iter().enumerate() { let call_id = format!( @@ -288,6 +264,32 @@ async fn chat_completions_stream( } } + // Normal text streaming (only when no function calls) + let text = extract_response_text(steps); + + if !text.is_empty() && text != last_text { + let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) { + &text[last_text.len()..] + } else { + &text + }; + + if !delta.is_empty() { + yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({ + "id": completion_id, + "object": "chat.completion.chunk", + "created": now_unix(), + "model": model_name, + "choices": [{ + "index": 0, + "delta": {"content": delta}, + "finish_reason": serde_json::Value::Null, + }], + })).unwrap_or_default())); + last_text = text.to_string(); + } + } + // Done check: need DONE status AND non-empty text if is_response_done(steps) && !last_text.is_empty() { debug!("Completions stream done, text length={}", last_text.len()); diff --git a/src/mitm/proxy.rs b/src/mitm/proxy.rs index 519c1ff..ae57e7b 100644 --- a/src/mitm/proxy.rs +++ b/src/mitm/proxy.rs @@ -850,14 +850,8 @@ async fn handle_http_over_tls( // Capture usage data if is_streaming_response { if streaming_acc.is_complete || streaming_acc.output_tokens > 0 { - // Save any captured function calls before consuming the accumulator - for fc in &streaming_acc.function_calls { - store.record_function_call(cascade_hint.as_deref(), fc.clone()).await; - } - // Also save for history rewriting on tool result turns - if !streaming_acc.function_calls.is_empty() { - store.set_last_function_calls(streaming_acc.function_calls.clone()).await; - } + // Function calls are stored immediately when detected (above), + // so no need to store them again here. let usage = streaming_acc.into_usage(); store.record_usage(cascade_hint.as_deref(), usage).await; }