fix: bypass LS entirely when custom tools are active
When custom tools are set, don't forward ANY response from Google to the LS. Instead, capture text and function calls directly into MitmStore. The completions handler reads from MitmStore. This eliminates the LS multi-turn loop (5 requests, 30+ seconds) that occurred because the LS kept processing responses internally. Tool calls now return in ~1.3s instead of timing out.
This commit is contained in:
@@ -274,6 +274,10 @@ async fn chat_completions_stream(
|
||||
let stream = async_stream::stream! {
|
||||
let start = std::time::Instant::now();
|
||||
let mut last_text = String::new();
|
||||
let has_custom_tools = state.mitm_store.get_tools().await.is_some();
|
||||
|
||||
// Clear any stale captured response from previous requests
|
||||
state.mitm_store.clear_response_async().await;
|
||||
|
||||
// Initial role chunk
|
||||
yield Ok::<_, std::convert::Infallible>(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
@@ -342,6 +346,112 @@ async fn chat_completions_stream(
|
||||
}
|
||||
}
|
||||
|
||||
// ── Check for MITM-captured response text (bypass LS) ──
|
||||
if has_custom_tools {
|
||||
if let Some(text) = state.mitm_store.peek_response_text().await {
|
||||
if !text.is_empty() && text != last_text {
|
||||
let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) {
|
||||
text[last_text.len()..].to_string()
|
||||
} else {
|
||||
text.clone()
|
||||
};
|
||||
|
||||
if !delta.is_empty() {
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": now_unix(),
|
||||
"model": model_name,
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"delta": {"content": delta},
|
||||
"finish_reason": serde_json::Value::Null,
|
||||
}],
|
||||
})).unwrap_or_default()));
|
||||
last_text = text;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if MITM response is complete
|
||||
if state.mitm_store.is_response_complete() && !last_text.is_empty() {
|
||||
debug!("Completions: MITM response complete (bypass), text length={}", last_text.len());
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": now_unix(),
|
||||
"model": model_name,
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"delta": {},
|
||||
"finish_reason": "stop",
|
||||
}],
|
||||
})).unwrap_or_default()));
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
return;
|
||||
}
|
||||
} else if state.mitm_store.is_response_complete() {
|
||||
// Response complete but no text — might be a tool call we already handled
|
||||
// or an empty response. Give it a moment then bail.
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
|
||||
// Re-check function calls one more time
|
||||
let final_check = state.mitm_store.take_any_function_calls().await;
|
||||
if let Some(ref calls) = final_check {
|
||||
if !calls.is_empty() {
|
||||
let mut tool_calls = Vec::new();
|
||||
for (i, fc) in calls.iter().enumerate() {
|
||||
let call_id = format!(
|
||||
"call_{}",
|
||||
uuid::Uuid::new_v4().to_string().replace('-', "")[..24].to_string()
|
||||
);
|
||||
let arguments = serde_json::to_string(&fc.args).unwrap_or_default();
|
||||
tool_calls.push(serde_json::json!({
|
||||
"index": i,
|
||||
"id": call_id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": fc.name,
|
||||
"arguments": arguments,
|
||||
},
|
||||
}));
|
||||
}
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": now_unix(),
|
||||
"model": model_name,
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"delta": {"tool_calls": tool_calls},
|
||||
"finish_reason": serde_json::Value::Null,
|
||||
}],
|
||||
})).unwrap_or_default()));
|
||||
yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
|
||||
"id": completion_id,
|
||||
"object": "chat.completion.chunk",
|
||||
"created": now_unix(),
|
||||
"model": model_name,
|
||||
"choices": [{
|
||||
"index": 0,
|
||||
"delta": {},
|
||||
"finish_reason": "tool_calls",
|
||||
}],
|
||||
})).unwrap_or_default()));
|
||||
yield Ok(Event::default().data("[DONE]"));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// When using bypass mode, skip LS step polling
|
||||
keepalive_counter += 1;
|
||||
if keepalive_counter % 10 == 0 {
|
||||
yield Ok(Event::default().comment("keepalive"));
|
||||
}
|
||||
let poll_ms: u64 = rand::thread_rng().gen_range(200..350);
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── Check LS steps for text streaming ──
|
||||
if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await {
|
||||
if status == 200 {
|
||||
|
||||
Reference in New Issue
Block a user