fix: block ALL LS follow-up requests, deduplicate function calls
- Add request_in_flight flag to MitmStore, set immediately when first LLM request is forwarded with custom tools active - Block ALL subsequent LS requests (agentic loop + internal flash-lite) with fake SSE responses instead of waiting for response_complete - Fix function call deduplication: drain() accumulator after storing to prevent 3x duplicate tool calls across SSE chunks - Clear all stale state (response, thinking, function calls, errors) at the start of each streaming request - Handle response_complete with no content (thoughtSignature-only) gracefully with timeout instead of infinite hang
This commit is contained in:
@@ -458,6 +458,7 @@ async fn handle_http_over_tls(
|
||||
loop {
|
||||
// ── Read the HTTP request from the client ─────────────────────────
|
||||
let mut request_buf = Vec::with_capacity(1024 * 64);
|
||||
let mut is_our_request = false;
|
||||
|
||||
// 60s timeout on initial read (LS may open connection without sending immediately)
|
||||
const IDLE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
|
||||
@@ -587,23 +588,30 @@ async fn handle_http_over_tls(
|
||||
let mut new_buf = updated_headers.into_bytes();
|
||||
new_buf.extend_from_slice(&new_chunked);
|
||||
request_buf = new_buf;
|
||||
|
||||
// Mark this as our modified request and set in-flight flag
|
||||
is_our_request = true;
|
||||
store.mark_request_in_flight();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Block follow-up requests when we already have a captured functionCall ──
|
||||
// The LS doesn't know what to do with the functionCall, so it tries more
|
||||
// Google API calls. Block those to save quota.
|
||||
if store.has_active_function_call() {
|
||||
// ── Block ALL LS follow-up requests once first is in-flight ──
|
||||
// When custom tools are active, we only need ONE request to Google.
|
||||
// The LS tries to send multiple requests (its own agentic loop +
|
||||
// internal requests on gemini-2.5-flash-lite). Block them ALL
|
||||
// immediately — don't wait for response_complete.
|
||||
let has_tools = store.get_tools().await.is_some();
|
||||
if has_tools && store.is_request_in_flight() && !is_our_request {
|
||||
info!(
|
||||
"MITM: blocking follow-up request — functionCall already captured"
|
||||
"MITM: blocking LS follow-up — request already in-flight"
|
||||
);
|
||||
// Return a fake SSE response that makes the LS stop
|
||||
let fake_response = "HTTP/1.1 200 OK\r\n\
|
||||
Content-Type: text/event-stream\r\n\
|
||||
Transfer-Encoding: chunked\r\n\
|
||||
\r\n";
|
||||
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Tool call completed. Awaiting external tool result.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
|
||||
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
|
||||
let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
|
||||
let mut response = fake_response.as_bytes().to_vec();
|
||||
response.extend_from_slice(&chunked_body);
|
||||
@@ -763,63 +771,46 @@ async fn handle_http_over_tls(
|
||||
response_body_buf.extend_from_slice(&header_buf[hdr_end..]);
|
||||
|
||||
// Parse ORIGINAL initial body for MITM interception
|
||||
let mut has_function_call = false;
|
||||
let bypass_ls = modify_requests && store.get_tools().await.is_some();
|
||||
|
||||
if is_streaming_response && hdr_end < header_buf.len() {
|
||||
let body = String::from_utf8_lossy(&header_buf[hdr_end..]);
|
||||
parse_streaming_chunk(&body, &mut streaming_acc);
|
||||
has_function_call = !streaming_acc.function_calls.is_empty();
|
||||
|
||||
// Immediately store captured function calls
|
||||
if has_function_call {
|
||||
for fc in &streaming_acc.function_calls {
|
||||
// Store captured function calls (drain to avoid re-storing on next chunk)
|
||||
if !streaming_acc.function_calls.is_empty() {
|
||||
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
|
||||
for fc in &calls {
|
||||
store.record_function_call(cascade_hint.as_deref(), fc.clone()).await;
|
||||
}
|
||||
store.set_last_function_calls(streaming_acc.function_calls.clone()).await;
|
||||
info!("MITM: stored {} function call(s) from initial body", streaming_acc.function_calls.len());
|
||||
store.set_last_function_calls(calls.clone()).await;
|
||||
info!("MITM: stored {} function call(s) from initial body", calls.len());
|
||||
}
|
||||
|
||||
// Capture response + thinking text + grounding directly into MitmStore
|
||||
if bypass_ls {
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
store.mark_response_complete();
|
||||
}
|
||||
// Capture response + thinking text + grounding into MitmStore
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
store.mark_response_complete();
|
||||
}
|
||||
}
|
||||
|
||||
if bypass_ls {
|
||||
if has_function_call {
|
||||
info!("MITM: functionCall captured → NOT forwarding to LS (bypass mode)");
|
||||
store.mark_response_complete();
|
||||
break;
|
||||
// Forward to client — rewrite function calls if custom tools are injected
|
||||
let forward_buf = if modify_requests {
|
||||
if let Some(modified) = super::modify::modify_response_chunk(&header_buf) {
|
||||
modified
|
||||
} else {
|
||||
header_buf.clone()
|
||||
}
|
||||
// Don't forward to LS — just continue reading chunks
|
||||
// Send headers only so upstream doesn't close
|
||||
if let Some(cl) = response_content_length {
|
||||
if response_body_buf.len() >= cl {
|
||||
store.mark_response_complete();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if is_chunked && has_chunked_terminator(&response_body_buf) {
|
||||
store.mark_response_complete();
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Normal path (no custom tools): forward headers+body as-is
|
||||
if let Err(e) = client.write_all(&header_buf).await {
|
||||
} else {
|
||||
header_buf.clone()
|
||||
};
|
||||
if let Err(e) = client.write_all(&forward_buf).await {
|
||||
warn!(error = %e, "MITM: write to client failed");
|
||||
break;
|
||||
}
|
||||
@@ -838,63 +829,46 @@ async fn handle_http_over_tls(
|
||||
}
|
||||
|
||||
// ── Response body interception ────────────────────────────────
|
||||
let mut chunk_has_fc = false;
|
||||
let bypass_ls = modify_requests && store.get_tools().await.is_some();
|
||||
|
||||
if is_streaming_response {
|
||||
let s = String::from_utf8_lossy(chunk);
|
||||
parse_streaming_chunk(&s, &mut streaming_acc);
|
||||
chunk_has_fc = !streaming_acc.function_calls.is_empty();
|
||||
|
||||
// Immediately store captured function calls
|
||||
if chunk_has_fc {
|
||||
for fc in &streaming_acc.function_calls {
|
||||
// Store captured function calls (drain to avoid re-storing on next chunk)
|
||||
if !streaming_acc.function_calls.is_empty() {
|
||||
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
|
||||
for fc in &calls {
|
||||
store.record_function_call(cascade_hint.as_deref(), fc.clone()).await;
|
||||
}
|
||||
store.set_last_function_calls(streaming_acc.function_calls.clone()).await;
|
||||
info!("MITM: stored {} function call(s) from body chunk", streaming_acc.function_calls.len());
|
||||
store.set_last_function_calls(calls.clone()).await;
|
||||
info!("MITM: stored {} function call(s) from body chunk", calls.len());
|
||||
}
|
||||
|
||||
// Capture response + thinking text + grounding directly into MitmStore
|
||||
if bypass_ls {
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
store.mark_response_complete();
|
||||
}
|
||||
// Capture response + thinking text + grounding into MitmStore
|
||||
if !streaming_acc.response_text.is_empty() {
|
||||
store.set_response_text(&streaming_acc.response_text).await;
|
||||
}
|
||||
if !streaming_acc.thinking_text.is_empty() {
|
||||
store.set_thinking_text(&streaming_acc.thinking_text).await;
|
||||
}
|
||||
if let Some(ref gm) = streaming_acc.grounding_metadata {
|
||||
store.set_grounding(gm.clone()).await;
|
||||
}
|
||||
if streaming_acc.is_complete {
|
||||
store.mark_response_complete();
|
||||
}
|
||||
}
|
||||
|
||||
if bypass_ls {
|
||||
if chunk_has_fc || streaming_acc.is_complete {
|
||||
info!("MITM: response captured → NOT forwarding to LS (bypass mode)");
|
||||
store.mark_response_complete();
|
||||
break;
|
||||
// Forward chunk to client (LS) — rewrite function calls if custom tools
|
||||
let forward_chunk = if modify_requests {
|
||||
if let Some(modified) = super::modify::modify_response_chunk(chunk) {
|
||||
modified
|
||||
} else {
|
||||
chunk.to_vec()
|
||||
}
|
||||
// Keep reading chunks without forwarding to LS
|
||||
response_body_buf.extend_from_slice(chunk);
|
||||
if let Some(cl) = response_content_length {
|
||||
if response_body_buf.len() >= cl {
|
||||
store.mark_response_complete();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if is_chunked && has_chunked_terminator(&response_body_buf) {
|
||||
store.mark_response_complete();
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Normal path: forward chunk to client (LS)
|
||||
if let Err(e) = client.write_all(chunk).await {
|
||||
} else {
|
||||
chunk.to_vec()
|
||||
};
|
||||
if let Err(e) = client.write_all(&forward_chunk).await {
|
||||
warn!(error = %e, "MITM: write to client failed");
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user