feat: capture function calls from Google + block follow-up quota waste

When MITM strips LS tools and injects custom tools:
- Google returns functionCall → captured in MitmStore
- Follow-up LS requests are blocked with fake SSE response
- Proxy consumes captured calls and clears the flag
- Result: 1 real Google API call instead of 5+ per tool call

Flow: Client → Proxy → LS → MITM(inject tool) → Google
      Google returns functionCall → MITM captures it
      LS tries follow-up → MITM blocks (fake response)
      Proxy reads captured functionCall → returns to client
This commit is contained in:
Nikketryhard
2026-02-14 22:37:28 -06:00
parent 146be139a2
commit 8455aa674f
5 changed files with 161 additions and 5 deletions

View File

@@ -565,6 +565,29 @@ async fn handle_http_over_tls(
}
}
}
// ── Block follow-up requests when we already have a captured functionCall ──
// The LS doesn't know what to do with the functionCall, so it tries more
// Google API calls. Block those to save quota.
if store.has_active_function_call() {
info!(
"MITM: blocking follow-up request — functionCall already captured"
);
// Return a fake SSE response that makes the LS stop
let fake_response = "HTTP/1.1 200 OK\r\n\
Content-Type: text/event-stream\r\n\
Transfer-Encoding: chunked\r\n\
\r\n";
let fake_sse = "data: {\"response\":{\"candidates\":[{\"content\":{\"parts\":[{\"text\":\"Tool call completed. Awaiting external tool result.\"}],\"role\":\"model\"},\"finishReason\":\"STOP\"}],\"usageMetadata\":{\"promptTokenCount\":0,\"candidatesTokenCount\":1,\"totalTokenCount\":1}}}\n\ndata: [DONE]\n\n";
let chunked_body = super::modify::rechunk(fake_sse.as_bytes());
let mut response = fake_response.as_bytes().to_vec();
response.extend_from_slice(&chunked_body);
if let Err(e) = client.write_all(&response).await {
warn!(error = %e, "MITM: failed to write fake response");
}
let _ = client.flush().await;
continue; // Skip the real upstream call
}
} else {
debug!(
domain,
@@ -739,6 +762,10 @@ async fn handle_http_over_tls(
// Capture usage data
if is_streaming_response {
if streaming_acc.is_complete || streaming_acc.output_tokens > 0 {
// Save any captured function calls before consuming the accumulator
for fc in &streaming_acc.function_calls {
store.record_function_call(cascade_hint.as_deref(), fc.clone()).await;
}
let usage = streaming_acc.into_usage();
store.record_usage(cascade_hint.as_deref(), usage).await;
}