fix: block ALL LS follow-up requests, deduplicate function calls

- Add request_in_flight flag to MitmStore, set immediately when first
  LLM request is forwarded with custom tools active
- Block ALL subsequent LS requests (agentic loop + internal flash-lite)
  with fake SSE responses instead of waiting for response_complete
- Fix function call deduplication: drain() accumulator after storing
  to prevent 3x duplicate tool calls across SSE chunks
- Clear all stale state (response, thinking, function calls, errors)
  at the start of each streaming request
- Handle response_complete with no content (thoughtSignature-only)
  gracefully with timeout instead of infinite hang
This commit is contained in:
Nikketryhard
2026-02-16 00:51:56 -06:00
parent 5f40385c8d
commit a8f3c8915f
6 changed files with 419 additions and 326 deletions

View File

@@ -458,6 +458,7 @@ async fn handle_http_over_tls(
loop {
// ── Read the HTTP request from the client ─────────────────────────
let mut request_buf = Vec::with_capacity(1024 * 64);
let mut is_our_request = false;
// 60s timeout on initial read (LS may open connection without sending immediately)
const IDLE_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(60);
@@ -587,23 +588,30 @@ async fn handle_http_over_tls(
let mut new_buf = updated_headers.into_bytes();
new_buf.extend_from_slice(&new_chunked);
request_buf = new_buf;
// Mark this as our modified request and set in-flight flag
is_our_request = true;
store.mark_request_in_flight();
}
}
}
// ── Block follow-up requests when we already have a captured functionCall ──
// The LS doesn't know what to do with the functionCall, so it tries more
// Google API calls. Block those to save quota.
if store.has_active_function_call() {
// ── Block ALL LS follow-up requests once first is in-flight ──
// When custom tools are active, we only need ONE request to Google.
// The LS tries to send multiple requests (its own agentic loop +
// internal requests on gemini-2.5-flash-lite). Block them ALL
// immediately — don't wait for response_complete.
let has_tools = store.get_tools().await.is_some();
if has_tools && store.is_request_in_flight() && !is_our_request {
info!(
"MITM: blocking follow-up request — functionCall already captured"
"MITM: blocking LS follow-up request already in-flight"
);
// Return a fake SSE response that makes the LS stop
let fake_response = "HTTP/1.1 200 OK\r\n\
Content-Type: text/event-stream\r\n\
Transfer-Encoding: chunked\r\n\
\r\n";
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Tool call completed. Awaiting external tool result.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Request handled.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
let mut response = fake_response.as_bytes().to_vec();
response.extend_from_slice(&chunked_body);
@@ -763,63 +771,46 @@ async fn handle_http_over_tls(
response_body_buf.extend_from_slice(&header_buf[hdr_end..]);
// Parse ORIGINAL initial body for MITM interception
let mut has_function_call = false;
let bypass_ls = modify_requests && store.get_tools().await.is_some();
if is_streaming_response && hdr_end < header_buf.len() {
let body = String::from_utf8_lossy(&header_buf[hdr_end..]);
parse_streaming_chunk(&body, &mut streaming_acc);
has_function_call = !streaming_acc.function_calls.is_empty();
// Immediately store captured function calls
if has_function_call {
for fc in &streaming_acc.function_calls {
// Store captured function calls (drain to avoid re-storing on next chunk)
if !streaming_acc.function_calls.is_empty() {
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
for fc in &calls {
store.record_function_call(cascade_hint.as_deref(), fc.clone()).await;
}
store.set_last_function_calls(streaming_acc.function_calls.clone()).await;
info!("MITM: stored {} function call(s) from initial body", streaming_acc.function_calls.len());
store.set_last_function_calls(calls.clone()).await;
info!("MITM: stored {} function call(s) from initial body", calls.len());
}
// Capture response + thinking text + grounding directly into MitmStore
if bypass_ls {
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
store.mark_response_complete();
}
// Capture response + thinking text + grounding into MitmStore
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
store.mark_response_complete();
}
}
if bypass_ls {
if has_function_call {
info!("MITM: functionCall captured → NOT forwarding to LS (bypass mode)");
store.mark_response_complete();
break;
// Forward to client — rewrite function calls if custom tools are injected
let forward_buf = if modify_requests {
if let Some(modified) = super::modify::modify_response_chunk(&header_buf) {
modified
} else {
header_buf.clone()
}
// Don't forward to LS — just continue reading chunks
// Send headers only so upstream doesn't close
if let Some(cl) = response_content_length {
if response_body_buf.len() >= cl {
store.mark_response_complete();
break;
}
}
if is_chunked && has_chunked_terminator(&response_body_buf) {
store.mark_response_complete();
break;
}
continue;
}
// Normal path (no custom tools): forward headers+body as-is
if let Err(e) = client.write_all(&header_buf).await {
} else {
header_buf.clone()
};
if let Err(e) = client.write_all(&forward_buf).await {
warn!(error = %e, "MITM: write to client failed");
break;
}
@@ -838,63 +829,46 @@ async fn handle_http_over_tls(
}
// ── Response body interception ────────────────────────────────
let mut chunk_has_fc = false;
let bypass_ls = modify_requests && store.get_tools().await.is_some();
if is_streaming_response {
let s = String::from_utf8_lossy(chunk);
parse_streaming_chunk(&s, &mut streaming_acc);
chunk_has_fc = !streaming_acc.function_calls.is_empty();
// Immediately store captured function calls
if chunk_has_fc {
for fc in &streaming_acc.function_calls {
// Store captured function calls (drain to avoid re-storing on next chunk)
if !streaming_acc.function_calls.is_empty() {
let calls: Vec<_> = streaming_acc.function_calls.drain(..).collect();
for fc in &calls {
store.record_function_call(cascade_hint.as_deref(), fc.clone()).await;
}
store.set_last_function_calls(streaming_acc.function_calls.clone()).await;
info!("MITM: stored {} function call(s) from body chunk", streaming_acc.function_calls.len());
store.set_last_function_calls(calls.clone()).await;
info!("MITM: stored {} function call(s) from body chunk", calls.len());
}
// Capture response + thinking text + grounding directly into MitmStore
if bypass_ls {
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
store.mark_response_complete();
}
// Capture response + thinking text + grounding into MitmStore
if !streaming_acc.response_text.is_empty() {
store.set_response_text(&streaming_acc.response_text).await;
}
if !streaming_acc.thinking_text.is_empty() {
store.set_thinking_text(&streaming_acc.thinking_text).await;
}
if let Some(ref gm) = streaming_acc.grounding_metadata {
store.set_grounding(gm.clone()).await;
}
if streaming_acc.is_complete {
store.mark_response_complete();
}
}
if bypass_ls {
if chunk_has_fc || streaming_acc.is_complete {
info!("MITM: response captured → NOT forwarding to LS (bypass mode)");
store.mark_response_complete();
break;
// Forward chunk to client (LS) — rewrite function calls if custom tools
let forward_chunk = if modify_requests {
if let Some(modified) = super::modify::modify_response_chunk(chunk) {
modified
} else {
chunk.to_vec()
}
// Keep reading chunks without forwarding to LS
response_body_buf.extend_from_slice(chunk);
if let Some(cl) = response_content_length {
if response_body_buf.len() >= cl {
store.mark_response_complete();
break;
}
}
if is_chunked && has_chunked_terminator(&response_body_buf) {
store.mark_response_complete();
break;
}
continue;
}
// Normal path: forward chunk to client (LS)
if let Err(e) = client.write_all(chunk).await {
} else {
chunk.to_vec()
};
if let Err(e) = client.write_all(&forward_chunk).await {
warn!(error = %e, "MITM: write to client failed");
break;
}