feat: capture thinking text via MITM dual-call merge

The LS makes TWO separate Google API calls for thinking models:
  Call 1: response + thinking token count (no thinking text)
  Call 2: thinking summary text (no thinking tokens)

Each hits a different StreamingAccumulator, so we:
1. Capture response_text in StreamingAccumulator (non-thinking parts)
2. In MitmStore::record_usage, detect when Call 2 arrives for a
   cascade that already has thinking tokens from Call 1
3. Merge Call 2's response_text as thinking_text on Call 1's usage

Also injects includeThoughts into Google API requests via MITM
modify to ensure thinking text is available in SSE responses.
This commit is contained in:
Nikketryhard
2026-02-14 19:49:15 -06:00
parent 905d55beb5
commit 34b9553484
4 changed files with 92 additions and 3 deletions

View File

@@ -152,6 +152,47 @@ pub fn modify_request(body: &[u8]) -> Option<Vec<u8>> {
}
}
// ── 4. Inject includeThoughts to capture thinking text ───────────────
// Without this flag, Google only reports thinking token counts
// but doesn't send the thinking text in SSE parts.
{
// Ensure request.generationConfig.thinkingConfig.includeThoughts = true
let request = json.get_mut("request").and_then(|v| v.as_object_mut());
if let Some(req) = request {
let gen_config = req
.entry("generationConfig")
.or_insert_with(|| serde_json::json!({}));
if let Some(gc) = gen_config.as_object_mut() {
let thinking_config = gc
.entry("thinkingConfig")
.or_insert_with(|| serde_json::json!({}));
if let Some(tc) = thinking_config.as_object_mut() {
if !tc.contains_key("includeThoughts") {
tc.insert("includeThoughts".to_string(), Value::Bool(true));
changes.push("inject includeThoughts".to_string());
}
}
}
} else {
// Not wrapped in request — try top-level (public API format)
let gen_config = json.as_object_mut().and_then(|o| {
Some(o.entry("generationConfig")
.or_insert_with(|| serde_json::json!({})))
});
if let Some(gc) = gen_config.and_then(|v| v.as_object_mut()) {
let thinking_config = gc
.entry("thinkingConfig")
.or_insert_with(|| serde_json::json!({}));
if let Some(tc) = thinking_config.as_object_mut() {
if !tc.contains_key("includeThoughts") {
tc.insert("includeThoughts".to_string(), Value::Bool(true));
changes.push("inject includeThoughts (top-level)".to_string());
}
}
}
}
}
if changes.is_empty() {
return None; // Nothing modified
}