fix: suppress dummy text from tool call responses
Check for MITM-captured function calls BEFORE emitting text in the streaming handler. This prevents the dummy 'Tool call completed' placeholder (sent to the LS) from leaking to OpenCode, which was confusing it into infinite loops. Also removes duplicate function call storage at end of response loop since they're now stored immediately when detected.
This commit is contained in:
@@ -212,36 +212,12 @@ async fn chat_completions_stream(
|
||||
if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await {
|
||||
if status == 200 {
|
||||
if let Some(steps) = data["steps"].as_array() {
|
||||
let text = extract_response_text(steps);
|
||||
|
||||
if !text.is_empty() && text != last_text {
|
||||
let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) {
|
||||
&text[last_text.len()..]
|
||||
} else {
|
||||
&text
|
||||
};
|
||||
|
||||
if !delta.is_empty() {
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": now_unix(),
|
||||
"model": model_name,
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"delta": {"content": delta},
|
||||
"finish_reason": serde_json::Value::Null,
|
||||
}],
|
||||
})).unwrap_or_default()));
|
||||
last_text = text.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// Check for MITM-captured function calls (tool use)
|
||||
// Check for MITM-captured function calls FIRST (before text)
|
||||
// This prevents dummy placeholder text from leaking to client
|
||||
let captured = state.mitm_store.take_any_function_calls().await;
|
||||
if let Some(ref calls) = captured {
|
||||
if !calls.is_empty() {
|
||||
// Emit tool_calls in OpenAI streaming format
|
||||
// Emit tool_calls in OpenAI streaming format — NO text
|
||||
let mut tool_calls = Vec::new();
|
||||
for (i, fc) in calls.iter().enumerate() {
|
||||
let call_id = format!(
|
||||
@@ -288,6 +264,32 @@ async fn chat_completions_stream(
|
||||
}
|
||||
}
|
||||
|
||||
// Normal text streaming (only when no function calls)
|
||||
let text = extract_response_text(steps);
|
||||
|
||||
if !text.is_empty() && text != last_text {
|
||||
let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) {
|
||||
&text[last_text.len()..]
|
||||
} else {
|
||||
&text
|
||||
};
|
||||
|
||||
if !delta.is_empty() {
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": now_unix(),
|
||||
"model": model_name,
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"delta": {"content": delta},
|
||||
"finish_reason": serde_json::Value::Null,
|
||||
}],
|
||||
})).unwrap_or_default()));
|
||||
last_text = text.to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// Done check: need DONE status AND non-empty text
|
||||
if is_response_done(steps) && !last_text.is_empty() {
|
||||
debug!("Completions stream done, text length={}", last_text.len());
|
||||
|
||||
@@ -850,14 +850,8 @@ async fn handle_http_over_tls(
|
||||
// Capture usage data
|
||||
if is_streaming_response {
|
||||
if streaming_acc.is_complete || streaming_acc.output_tokens > 0 {
|
||||
// Save any captured function calls before consuming the accumulator
|
||||
for fc in &streaming_acc.function_calls {
|
||||
store.record_function_call(cascade_hint.as_deref(), fc.clone()).await;
|
||||
}
|
||||
// Also save for history rewriting on tool result turns
|
||||
if !streaming_acc.function_calls.is_empty() {
|
||||
store.set_last_function_calls(streaming_acc.function_calls.clone()).await;
|
||||
}
|
||||
// Function calls are stored immediately when detected (above),
|
||||
// so no need to store them again here.
|
||||
let usage = streaming_acc.into_usage();
|
||||
store.record_usage(cascade_hint.as_deref(), usage).await;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user