feat: capture thinking text from MITM-intercepted API responses

The LS strips thinking/reasoning text from plannerResponse steps —
only the thinkingSignature (opaque verification blob) is preserved.
The actual thinking text flows through the MITM proxy in the raw
Google SSE response (parts with thought: true) and Anthropic SSE
(thinking_delta content blocks).

Changes:
- StreamingAccumulator now accumulates thinking text from SSE events
- ApiUsage gains thinking_text: Option<String>
- usage_from_poll returns (Usage, Option<thinking_text>)
- Thinking text priority: MITM-captured > LS-extracted (fallback)
- Reasoning output item now populated from real API data
- Removed debug dump code
This commit is contained in:
Nikketryhard
2026-02-14 19:30:09 -06:00
parent 19dc920872
commit 905d55beb5
4 changed files with 60 additions and 17 deletions

View File

@@ -57,6 +57,8 @@ pub struct StreamingAccumulator {
pub cache_creation_input_tokens: u64,
pub cache_read_input_tokens: u64,
pub thinking_tokens: u64,
/// Accumulated thinking/reasoning text from the model.
pub thinking_text: String,
pub model: Option<String>,
pub stop_reason: Option<String>,
pub is_complete: bool,
@@ -81,9 +83,19 @@ impl StreamingAccumulator {
if let Some(model) = response["modelVersion"].as_str() {
self.model = Some(model.to_string());
}
// Check for completion in candidates
// Extract thinking text from parts with thought: true
if let Some(candidates) = response.get("candidates").and_then(|c| c.as_array()) {
for candidate in candidates {
if let Some(parts) = candidate["content"]["parts"].as_array() {
for part in parts {
if part["thought"].as_bool() == Some(true) {
if let Some(text) = part["text"].as_str() {
self.thinking_text.push_str(text);
}
}
}
}
// Check for completion
if let Some(reason) = candidate["finishReason"].as_str() {
self.stop_reason = Some(reason.to_string());
if reason == "STOP" {
@@ -97,6 +109,7 @@ impl StreamingAccumulator {
input = self.input_tokens,
output = self.output_tokens,
thinking = self.thinking_tokens,
thinking_text_len = self.thinking_text.len(),
complete = self.is_complete,
"SSE Google: usage update"
);
@@ -136,7 +149,16 @@ impl StreamingAccumulator {
"SSE Anthropic: stream complete"
);
}
"content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {}
// Anthropic thinking content blocks
"content_block_delta" => {
// type: "thinking" delta contains thinking text
if event["delta"]["type"].as_str() == Some("thinking_delta") {
if let Some(text) = event["delta"]["thinking"].as_str() {
self.thinking_text.push_str(text);
}
}
}
"content_block_start" | "content_block_stop" | "ping" => {}
_ => {
trace!(event_type, "SSE: unknown event type");
}
@@ -145,12 +167,18 @@ impl StreamingAccumulator {
/// Convert accumulated data to an ApiUsage.
pub fn into_usage(self) -> ApiUsage {
let thinking_text = if self.thinking_text.is_empty() {
None
} else {
Some(self.thinking_text)
};
ApiUsage {
input_tokens: self.input_tokens,
output_tokens: self.output_tokens,
cache_creation_input_tokens: self.cache_creation_input_tokens,
cache_read_input_tokens: self.cache_read_input_tokens,
thinking_output_tokens: self.thinking_tokens,
thinking_text,
response_output_tokens: 0,
model: self.model,
stop_reason: self.stop_reason,
@@ -174,6 +202,7 @@ fn extract_usage_from_message(msg: &Value) -> Option<ApiUsage> {
cache_creation_input_tokens: usage["cache_creation_input_tokens"].as_u64().unwrap_or(0),
cache_read_input_tokens: usage["cache_read_input_tokens"].as_u64().unwrap_or(0),
thinking_output_tokens: 0,
thinking_text: None,
response_output_tokens: 0,
model: msg["model"].as_str().map(|s| s.to_string()),
stop_reason: msg["stop_reason"].as_str().map(|s| s.to_string()),