fix: multi-round tool history rewrite and finishReason handling

- Add ToolRound struct to pair function calls with results per-round - Replace single-match history rewrite (broke after first round) with multi-round loop that rewrites ALL placeholder model turns - Fix tool result name fallback: use positional index instead of always picking the first call - Set is_complete for any finishReason (FUNCTION_CALL, MAX_TOKENS, etc.) not just STOP — prevents response_complete flag from never being set - Legacy fallback: responses.rs path (single-round via last_calls + pending_results) still works when tool_rounds is empty - Add tests: multi-round rewrite, single-round legacy, no-op, and FUNCTION_CALL/MAX_TOKENS finishReason handling
2026-02-16 19:05:37 -06:00
parent 6bda2ecafa
commit 39381a4dfe
5 changed files with 410 additions and 88 deletions
--- a/src/mitm/intercept.rs
+++ b/src/mitm/intercept.rs
@@ -153,12 +153,10 @@ impl StreamingAccumulator {
                            }
                        }
                    }
-                    // Check for completion
+                    // Check for completion — any finishReason means response is done
                    if let Some(reason) = candidate["finishReason"].as_str() {
                        self.stop_reason = Some(reason.to_string());
-                        if reason == "STOP" {
-                            self.is_complete = true;
-                        }
+                        self.is_complete = true;
                        // Log non-STOP finish reasons
                        if reason != "STOP" {
                            info!(finish_reason = reason, "MITM: non-STOP finish reason");
@@ -589,4 +587,30 @@ data: {"response": {"candidates": [{"content": {"role": "model","parts": [{"text
        );
        assert_eq!(acc.stop_reason, Some("STOP".to_string()));
    }
+
+    #[test]
+    fn test_function_call_finish_reason_sets_complete() {
+        let mut acc = StreamingAccumulator::new();
+
+        let event = "data: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\", \"parts\": [{\"functionCall\": {\"name\": \"read_file\", \"args\": {\"path\": \"/foo\"}}}]}, \"finishReason\": \"FUNCTION_CALL\"}], \"usageMetadata\": {\"promptTokenCount\": 50, \"candidatesTokenCount\": 5, \"totalTokenCount\": 55}, \"modelVersion\": \"gemini-3-flash\"}}\n";
+        parse_streaming_chunk(event, &mut acc);
+
+        assert!(acc.is_complete, "FUNCTION_CALL finishReason should set is_complete");
+        assert_eq!(acc.stop_reason, Some("FUNCTION_CALL".to_string()));
+        assert_eq!(acc.function_calls.len(), 1);
+        assert_eq!(acc.function_calls[0].name, "read_file");
+        assert_eq!(acc.output_tokens, 5);
+    }
+
+    #[test]
+    fn test_max_tokens_finish_reason_sets_complete() {
+        let mut acc = StreamingAccumulator::new();
+
+        let event = "data: {\"response\": {\"candidates\": [{\"content\": {\"role\": \"model\", \"parts\": [{\"text\": \"truncated...\"}]}, \"finishReason\": \"MAX_TOKENS\"}], \"usageMetadata\": {\"promptTokenCount\": 50, \"candidatesTokenCount\": 100, \"totalTokenCount\": 150}}}\n";
+        parse_streaming_chunk(event, &mut acc);
+
+        assert!(acc.is_complete, "MAX_TOKENS finishReason should set is_complete");
+        assert_eq!(acc.stop_reason, Some("MAX_TOKENS".to_string()));
+        assert_eq!(acc.response_text, "truncated...");
+    }
 }