fix: bypass LS entirely when custom tools are active

When custom tools are set, don't forward ANY response from Google
to the LS. Instead, capture text and function calls directly into
MitmStore. The completions handler reads from MitmStore.

This eliminates the LS multi-turn loop (5 requests, 30+ seconds)
that occurred because the LS kept processing responses internally.
Tool calls now return in ~1.3s instead of timing out.
This commit is contained in:
Nikketryhard
2026-02-15 00:54:40 -06:00
parent ec1c0c700d
commit 50b53097bc
3 changed files with 229 additions and 36 deletions

View File

@@ -274,6 +274,10 @@ async fn chat_completions_stream(
let stream = async_stream::stream! {
let start = std::time::Instant::now();
let mut last_text = String::new();
let has_custom_tools = state.mitm_store.get_tools().await.is_some();
// Clear any stale captured response from previous requests
state.mitm_store.clear_response_async().await;
// Initial role chunk
yield Ok::<_, std::convert::Infallible>(Event::default().data(serde_json::to_string(&serde_json::json!({
@@ -342,6 +346,112 @@ async fn chat_completions_stream(
}
}
// ── Check for MITM-captured response text (bypass LS) ──
if has_custom_tools {
if let Some(text) = state.mitm_store.peek_response_text().await {
if !text.is_empty() && text != last_text {
let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) {
text[last_text.len()..].to_string()
} else {
text.clone()
};
if !delta.is_empty() {
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
"id": completion_id,
"object": "chat.completion.chunk",
"created": now_unix(),
"model": model_name,
"choices": [{
"index": 0,
"delta": {"content": delta},
"finish_reason": serde_json::Value::Null,
}],
})).unwrap_or_default()));
last_text = text;
}
}
// Check if MITM response is complete
if state.mitm_store.is_response_complete() && !last_text.is_empty() {
debug!("Completions: MITM response complete (bypass), text length={}", last_text.len());
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
"id": completion_id,
"object": "chat.completion.chunk",
"created": now_unix(),
"model": model_name,
"choices": [{
"index": 0,
"delta": {},
"finish_reason": "stop",
}],
})).unwrap_or_default()));
yield Ok(Event::default().data("[DONE]"));
return;
}
} else if state.mitm_store.is_response_complete() {
// Response complete but no text — might be a tool call we already handled
// or an empty response. Give it a moment then bail.
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
// Re-check function calls one more time
let final_check = state.mitm_store.take_any_function_calls().await;
if let Some(ref calls) = final_check {
if !calls.is_empty() {
let mut tool_calls = Vec::new();
for (i, fc) in calls.iter().enumerate() {
let call_id = format!(
"call_{}",
uuid::Uuid::new_v4().to_string().replace('-', "")[..24].to_string()
);
let arguments = serde_json::to_string(&fc.args).unwrap_or_default();
tool_calls.push(serde_json::json!({
"index": i,
"id": call_id,
"type": "function",
"function": {
"name": fc.name,
"arguments": arguments,
},
}));
}
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
"id": completion_id,
"object": "chat.completion.chunk",
"created": now_unix(),
"model": model_name,
"choices": [{
"index": 0,
"delta": {"tool_calls": tool_calls},
"finish_reason": serde_json::Value::Null,
}],
})).unwrap_or_default()));
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
"id": completion_id,
"object": "chat.completion.chunk",
"created": now_unix(),
"model": model_name,
"choices": [{
"index": 0,
"delta": {},
"finish_reason": "tool_calls",
}],
})).unwrap_or_default()));
yield Ok(Event::default().data("[DONE]"));
return;
}
}
}
// When using bypass mode, skip LS step polling
keepalive_counter += 1;
if keepalive_counter % 10 == 0 {
yield Ok(Event::default().comment("keepalive"));
}
let poll_ms: u64 = rand::thread_rng().gen_range(200..350);
tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
continue;
}
// ── Check LS steps for text streaming ──
if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await {
if status == 200 {