feat: full tool call support (OpenAI + Gemini endpoints)

- store.rs: Add tool context storage (active tools, tool config, pending tool results, call_id mapping, last function calls for history rewrite) - types.rs: Add tools/tool_choice fields to ResponsesRequest, add build_function_call_output helper for OpenAI function_call output items - modify.rs: Replace hardcoded get_weather with dynamic ToolContext injection. Add openai_tools_to_gemini and openai_tool_choice_to_gemini converters. Add conversation history rewriting for tool result turns (replaces fake 'Tool call completed' model turn with real functionCall, injects functionResponse before last user turn) - proxy.rs: Build ToolContext from MitmStore before calling modify_request. Save last_function_calls for history rewriting on subsequent turns - responses.rs: Store client tools in MitmStore before LS call. Detect function_call_output in input array for tool result submission. Return captured functionCalls as OpenAI function_call output items with generated call_ids and stringified arguments - gemini.rs: New Gemini-native endpoint (POST /v1/gemini) with zero format translation. Accepts functionDeclarations directly, returns functionCall in Gemini format directly - mod.rs: Wire /v1/gemini route, bump version to 3.3.0
2026-02-14 22:56:44 -06:00
parent 8455aa674f
commit 786987116b
8 changed files with 989 additions and 51 deletions
--- a/src/api/responses.rs
+++ b/src/api/responses.rs
@@ -18,42 +18,91 @@ use super::polling::{extract_response_text, is_response_done, poll_for_response,
 use super::types::*;
 use super::util::{err_response, now_unix, responses_sse_event};
 use super::AppState;
+use crate::mitm::store::PendingToolResult;
+use crate::mitm::modify::{openai_tools_to_gemini, openai_tool_choice_to_gemini};

 // ─── Input extraction ────────────────────────────────────────────────────────

+/// Parsed tool result from function_call_output items in input.
+struct ToolResultInput {
+    call_id: String,
+    output: String,
+}
+
 /// Extract user text from Responses API `input` field.
-fn extract_responses_input(input: &serde_json::Value, instructions: Option<&str>) -> String {
+/// Also extracts any function_call_output items for tool result handling.
+fn extract_responses_input(input: &serde_json::Value, instructions: Option<&str>) -> (String, Vec<ToolResultInput>) {
+    let mut tool_results: Vec<ToolResultInput> = Vec::new();
+
    let user_text = match input {
        serde_json::Value::String(s) => s.clone(),
        serde_json::Value::Array(items) => {
-            items
-                .iter()
-                .rev()
-                .find(|item| item["role"].as_str() == Some("user"))
-                .and_then(|item| match &item["content"] {
-                    serde_json::Value::String(s) => Some(s.clone()),
-                    serde_json::Value::Array(parts) => Some(
-                        parts
-                            .iter()
-                            .filter(|p| {
-                                let t = p["type"].as_str().unwrap_or("");
-                                t == "input_text" || t == "text"
-                            })
-                            .filter_map(|p| p["text"].as_str())
-                            .collect::<Vec<_>>()
-                            .join(" "),
-                    ),
-                    _ => None,
-                })
-                .unwrap_or_default()
+            // Check for function_call_output items
+            for item in items {
+                if item["type"].as_str() == Some("function_call_output") {
+                    if let (Some(call_id), Some(output)) = (
+                        item["call_id"].as_str(),
+                        item["output"].as_str(),
+                    ) {
+                        tool_results.push(ToolResultInput {
+                            call_id: call_id.to_string(),
+                            output: output.to_string(),
+                        });
+                    }
+                }
+            }
+
+            // If we have tool results but no text, generate a follow-up prompt
+            if !tool_results.is_empty() {
+                // Look for any text items alongside the tool results
+                let text_items: String = items
+                    .iter()
+                    .filter(|item| {
+                        let t = item["type"].as_str().unwrap_or("");
+                        t == "input_text" || t == "text"
+                    })
+                    .filter_map(|p| p["text"].as_str())
+                    .collect::<Vec<_>>()
+                    .join(" ");
+
+                if text_items.is_empty() {
+                    "Use the tool results to answer the original question.".to_string()
+                } else {
+                    text_items
+                }
+            } else {
+                // Normal input extraction (existing logic)
+                items
+                    .iter()
+                    .rev()
+                    .find(|item| item["role"].as_str() == Some("user"))
+                    .and_then(|item| match &item["content"] {
+                        serde_json::Value::String(s) => Some(s.clone()),
+                        serde_json::Value::Array(parts) => Some(
+                            parts
+                                .iter()
+                                .filter(|p| {
+                                    let t = p["type"].as_str().unwrap_or("");
+                                    t == "input_text" || t == "text"
+                                })
+                                .filter_map(|p| p["text"].as_str())
+                                .collect::<Vec<_>>()
+                                .join(" "),
+                        ),
+                        _ => None,
+                    })
+                    .unwrap_or_default()
+            }
        }
        _ => String::new(),
    };

-    match instructions {
+    let final_text = match instructions {
        Some(inst) if !inst.is_empty() => format!("{inst}\n\n{user_text}"),
        _ => user_text,
-    }
+    };
+
+    (final_text, tool_results)
 }

 /// Extract conversation/session ID from Responses API `conversation` field.
@@ -147,8 +196,32 @@ pub(crate) async fn handle_responses(
        );
    }

-    let user_text = extract_responses_input(&body.input, body.instructions.as_deref());
-    if user_text.is_empty() {
+    let (user_text, tool_results) = extract_responses_input(&body.input, body.instructions.as_deref());
+
+    // Handle tool result submission (function_call_output in input)
+    let is_tool_result_turn = !tool_results.is_empty();
+    if is_tool_result_turn {
+        for tr in &tool_results {
+            // Look up function name from call_id
+            let name = state.mitm_store.lookup_call_id(&tr.call_id).await
+                .unwrap_or_else(|| "unknown_function".to_string());
+
+            // Parse the output as JSON, fall back to string wrapper
+            let result_value = serde_json::from_str::<serde_json::Value>(&tr.output)
+                .unwrap_or_else(|_| serde_json::json!({"result": tr.output}));
+
+            state.mitm_store.add_tool_result(PendingToolResult {
+                name,
+                result: result_value,
+            }).await;
+        }
+        info!(
+            count = tool_results.len(),
+            "Stored tool results for MITM injection"
+        );
+    }
+
+    if user_text.is_empty() && !is_tool_result_turn {
        return err_response(
            StatusCode::BAD_REQUEST,
            "No user input found".to_string(),
@@ -156,6 +229,19 @@ pub(crate) async fn handle_responses(
        );
    }

+    // Store client tools in MitmStore for MITM injection
+    if let Some(ref tools) = body.tools {
+        let gemini_tools = openai_tools_to_gemini(tools);
+        if !gemini_tools.is_empty() {
+            state.mitm_store.set_tools(gemini_tools).await;
+            info!(count = tools.len(), "Stored client tools for MITM injection");
+        }
+    }
+    if let Some(ref choice) = body.tool_choice {
+        let gemini_config = openai_tool_choice_to_gemini(choice);
+        state.mitm_store.set_tool_config(gemini_config).await;
+    }
+
    let response_id = format!(
        "resp_{}",
        uuid::Uuid::new_v4().to_string().replace('-', "")
@@ -363,14 +449,52 @@ async fn handle_responses_sync(

    // Check for captured function calls from MITM (clears the active flag)
    let captured_tool_calls = state.mitm_store.take_any_function_calls().await;
+
+    // If we have captured tool calls, return them as function_call output items
    if let Some(ref calls) = captured_tool_calls {
        info!(
            count = calls.len(),
            tools = ?calls.iter().map(|c| &c.name).collect::<Vec<_>>(),
-            "Consumed captured function calls from MITM"
+            "Returning captured function calls to client"
        );
+
+        let mut output_items: Vec<serde_json::Value> = Vec::new();
+        for fc in calls {
+            let call_id = format!(
+                "call_{}",
+                uuid::Uuid::new_v4().to_string().replace('-', "")[..24].to_string()
+            );
+            // Register call_id → name mapping for tool result routing
+            state.mitm_store.register_call_id(call_id.clone(), fc.name.clone()).await;
+
+            // Stringify args (OpenAI sends arguments as JSON string)
+            let arguments = serde_json::to_string(&fc.args).unwrap_or_default();
+            output_items.push(build_function_call_output(&call_id, &fc.name, &arguments));
+        }
+
+        let (usage, _) = usage_from_poll(
+            &state.mitm_store, &cascade_id, &poll_result.usage,
+            &params.user_text, &poll_result.text,
+        ).await;
+
+        let resp = build_response_object(
+            ResponseData {
+                id: response_id,
+                model: model_name,
+                status: "completed",
+                created_at,
+                completed_at: Some(completed_at),
+                output: output_items,
+                usage: Some(usage),
+                thinking_signature: poll_result.thinking_signature,
+            },
+            &params,
+        );
+
+        return Json(resp).into_response();
    }

+    // Normal text response (no tool calls)
    let (usage, mitm_thinking) = usage_from_poll(&state.mitm_store, &cascade_id, &poll_result.usage, &params.user_text, &poll_result.text).await;

    // Thinking text priority: MITM-captured (raw API) > LS-extracted (steps)