diff --git a/src/api/completions.rs b/src/api/completions.rs index 9208e66..0e09b47 100644 --- a/src/api/completions.rs +++ b/src/api/completions.rs @@ -212,36 +212,12 @@ async fn chat_completions_stream( if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await { if status == 200 { if let Some(steps) = data["steps"].as_array() { - let text = extract_response_text(steps); - - if !text.is_empty() && text != last_text { - let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) { - &text[last_text.len()..] - } else { - &text - }; - - if !delta.is_empty() { - yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({ - "id": completion_id, - "object": "chat.completion.chunk", - "created": now_unix(), - "model": model_name, - "choices": [{ - "index": 0, - "delta": {"content": delta}, - "finish_reason": serde_json::Value::Null, - }], - })).unwrap_or_default())); - last_text = text.to_string(); - } - } - - // Check for MITM-captured function calls (tool use) + // Check for MITM-captured function calls FIRST (before text) + // This prevents dummy placeholder text from leaking to client let captured = state.mitm_store.take_any_function_calls().await; if let Some(ref calls) = captured { if !calls.is_empty() { - // Emit tool_calls in OpenAI streaming format + // Emit tool_calls in OpenAI streaming format — NO text let mut tool_calls = Vec::new(); for (i, fc) in calls.iter().enumerate() { let call_id = format!( @@ -288,6 +264,32 @@ async fn chat_completions_stream( } } + // Normal text streaming (only when no function calls) + let text = extract_response_text(steps); + + if !text.is_empty() && text != last_text { + let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) { + &text[last_text.len()..] + } else { + &text + }; + + if !delta.is_empty() { + yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({ + "id": completion_id, + "object": "chat.completion.chunk", + "created": now_unix(), + "model": model_name, + "choices": [{ + "index": 0, + "delta": {"content": delta}, + "finish_reason": serde_json::Value::Null, + }], + })).unwrap_or_default())); + last_text = text.to_string(); + } + } + // Done check: need DONE status AND non-empty text if is_response_done(steps) && !last_text.is_empty() { debug!("Completions stream done, text length={}", last_text.len()); diff --git a/src/mitm/proxy.rs b/src/mitm/proxy.rs index 519c1ff..ae57e7b 100644 --- a/src/mitm/proxy.rs +++ b/src/mitm/proxy.rs @@ -850,14 +850,8 @@ async fn handle_http_over_tls( // Capture usage data if is_streaming_response { if streaming_acc.is_complete || streaming_acc.output_tokens > 0 { - // Save any captured function calls before consuming the accumulator - for fc in &streaming_acc.function_calls { - store.record_function_call(cascade_hint.as_deref(), fc.clone()).await; - } - // Also save for history rewriting on tool result turns - if !streaming_acc.function_calls.is_empty() { - store.set_last_function_calls(streaming_acc.function_calls.clone()).await; - } + // Function calls are stored immediately when detected (above), + // so no need to store them again here. let usage = streaming_acc.into_usage(); store.record_usage(cascade_hint.as_deref(), usage).await; }