chore: remove broken googleSearch grounding and /v1/search endpoint

This commit is contained in:
Nikketryhard
2026-02-15 17:08:46 -06:00
parent cc5f48967a
commit afa96b88a5
5 changed files with 80 additions and 0 deletions

View File

@@ -5,6 +5,7 @@ mod gemini;
mod models; mod models;
mod polling; mod polling;
mod responses; mod responses;
mod types; mod types;
mod util; mod util;
@@ -43,6 +44,7 @@ pub fn router(state: Arc<AppState>) -> Router {
post(completions::handle_completions), post(completions::handle_completions),
) )
.route("/v1/gemini", post(gemini::handle_gemini)) .route("/v1/gemini", post(gemini::handle_gemini))
.route("/v1/models", get(handle_models)) .route("/v1/models", get(handle_models))
.route("/v1/sessions", get(handle_list_sessions)) .route("/v1/sessions", get(handle_list_sessions))
.route("/v1/sessions/{id}", delete(handle_delete_session)) .route("/v1/sessions/{id}", delete(handle_delete_session))
@@ -67,6 +69,7 @@ async fn handle_root() -> Json<serde_json::Value> {
"/v1/chat/completions", "/v1/chat/completions",
"/v1/responses", "/v1/responses",
"/v1/gemini", "/v1/gemini",
"/v1/models", "/v1/models",
"/v1/sessions", "/v1/sessions",
"/v1/token", "/v1/token",

View File

@@ -356,6 +356,7 @@ fn print_banner(port: u16, pid: &str, https_port: &str, csrf: &str, token: &str,
println!(" \x1b[1mroutes\x1b[0m"); println!(" \x1b[1mroutes\x1b[0m");
println!(" \x1b[33m POST\x1b[0m /v1/responses"); println!(" \x1b[33m POST\x1b[0m /v1/responses");
println!(" \x1b[33m POST\x1b[0m /v1/chat/completions"); println!(" \x1b[33m POST\x1b[0m /v1/chat/completions");
println!(" \x1b[33m POST\x1b[0m /v1/gemini");
println!(" \x1b[32m GET \x1b[0m /v1/models"); println!(" \x1b[32m GET \x1b[0m /v1/models");
println!(" \x1b[32m GET \x1b[0m /v1/sessions"); println!(" \x1b[32m GET \x1b[0m /v1/sessions");
println!(" \x1b[31m DEL \x1b[0m /v1/sessions/:id"); println!(" \x1b[31m DEL \x1b[0m /v1/sessions/:id");

View File

@@ -68,6 +68,9 @@ pub struct StreamingAccumulator {
pub api_provider: Option<String>, pub api_provider: Option<String>,
/// Captured function calls from Google's response. /// Captured function calls from Google's response.
pub function_calls: Vec<CapturedFunctionCall>, pub function_calls: Vec<CapturedFunctionCall>,
/// Captured grounding metadata from Google Search grounding.
/// Contains search queries, web results, and citations.
pub grounding_metadata: Option<serde_json::Value>,
} }
impl StreamingAccumulator { impl StreamingAccumulator {
@@ -137,6 +140,15 @@ impl StreamingAccumulator {
info!(finish_reason = reason, "MITM: non-STOP finish reason"); info!(finish_reason = reason, "MITM: non-STOP finish reason");
} }
} }
// Capture grounding metadata (Google Search grounding results)
if let Some(gm) = candidate.get("groundingMetadata") {
self.grounding_metadata = Some(gm.clone());
debug!(
has_search_queries = gm.get("searchEntryPoint").is_some(),
has_web_results = gm.get("groundingChunks").is_some(),
"MITM: captured grounding metadata"
);
}
} }
} }
self.api_provider = Some("google".to_string()); self.api_provider = Some("google".to_string());

View File

@@ -313,7 +313,17 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
// ── 4. Inject includeThoughts to capture thinking text ─────────────── // ── 4. Inject includeThoughts to capture thinking text ───────────────
// Without this flag, Google only reports thinking token counts // Without this flag, Google only reports thinking token counts
// but doesn't send the thinking text in SSE parts. // but doesn't send the thinking text in SSE parts.
//
// Also inject thinkingLevel if client specified reasoning_effort.
// Gemini 3 uses thinkingLevel ("low"/"medium"/"high"/"minimal")
// instead of Gemini 2.5's thinkingBudget (integer).
{ {
// Get reasoning_effort from generation params if available
let reasoning_effort = tool_ctx
.as_ref()
.and_then(|ctx| ctx.generation_params.as_ref())
.and_then(|gp| gp.reasoning_effort.clone());
// Ensure request.generationConfig.thinkingConfig.includeThoughts = true // Ensure request.generationConfig.thinkingConfig.includeThoughts = true
let request = json.get_mut("request").and_then(|v| v.as_object_mut()); let request = json.get_mut("request").and_then(|v| v.as_object_mut());
if let Some(req) = request { if let Some(req) = request {
@@ -329,6 +339,10 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
tc.insert("includeThoughts".to_string(), Value::Bool(true)); tc.insert("includeThoughts".to_string(), Value::Bool(true));
changes.push("inject includeThoughts".to_string()); changes.push("inject includeThoughts".to_string());
} }
if let Some(ref effort) = reasoning_effort {
tc.insert("thinkingLevel".to_string(), Value::String(effort.clone()));
changes.push(format!("inject thinkingLevel={effort}"));
}
} }
} }
} else { } else {
@@ -346,6 +360,10 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
tc.insert("includeThoughts".to_string(), Value::Bool(true)); tc.insert("includeThoughts".to_string(), Value::Bool(true));
changes.push("inject includeThoughts (top-level)".to_string()); changes.push("inject includeThoughts (top-level)".to_string());
} }
if let Some(ref effort) = reasoning_effort {
tc.insert("thinkingLevel".to_string(), Value::String(effort.clone()));
changes.push(format!("inject thinkingLevel={effort} (top-level)"));
}
} }
} }
} }
@@ -399,6 +417,14 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
gc.insert("presencePenalty".to_string(), serde_json::json!(pp)); gc.insert("presencePenalty".to_string(), serde_json::json!(pp));
injected.push(format!("presencePenalty={pp}")); injected.push(format!("presencePenalty={pp}"));
} }
if let Some(ref mime) = gp.response_mime_type {
gc.insert("responseMimeType".to_string(), serde_json::json!(mime));
injected.push(format!("responseMimeType={mime}"));
}
if let Some(ref schema) = gp.response_schema {
gc.insert("responseSchema".to_string(), schema.clone());
injected.push("responseSchema=<schema>".to_string());
}
if !injected.is_empty() { if !injected.is_empty() {
changes.push(format!("inject generationConfig: {}", injected.join(", "))); changes.push(format!("inject generationConfig: {}", injected.join(", ")));
@@ -428,6 +454,8 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
changes.join(", ") changes.join(", ")
); );
Some(modified_bytes) Some(modified_bytes)
} }

View File

@@ -73,6 +73,18 @@ pub struct GenerationParams {
pub frequency_penalty: Option<f64>, pub frequency_penalty: Option<f64>,
/// Presence penalty (OpenAI) — mapped to presencePenalty in Gemini. /// Presence penalty (OpenAI) — mapped to presencePenalty in Gemini.
pub presence_penalty: Option<f64>, pub presence_penalty: Option<f64>,
/// Reasoning effort — mapped to thinkingConfig.thinkingLevel in Gemini 3.
/// Values: "low", "medium", "high" (maps 1:1 to Google's thinkingLevel).
pub reasoning_effort: Option<String>,
/// Response MIME type — injected as generationConfig.responseMimeType.
/// e.g., "application/json" for JSON mode.
pub response_mime_type: Option<String>,
/// Response schema — injected as generationConfig.responseSchema.
/// Used for structured output (json_schema format).
pub response_schema: Option<serde_json::Value>,
/// Enable Google Search grounding — injects {"googleSearch": {}} into tools.
/// Default off. When enabled, model responses include groundingMetadata.
pub google_search: bool,
} }
/// Thread-safe store for intercepted data. /// Thread-safe store for intercepted data.
@@ -121,6 +133,10 @@ pub struct MitmStore {
// ── Generation parameters for MITM injection ───────────────────────── // ── Generation parameters for MITM injection ─────────────────────────
/// Client-specified sampling parameters to inject into Google API requests. /// Client-specified sampling parameters to inject into Google API requests.
generation_params: Arc<RwLock<Option<GenerationParams>>>, generation_params: Arc<RwLock<Option<GenerationParams>>>,
// ── Grounding metadata capture ──────────────────────────────────────
/// Captured grounding metadata from Google API responses (search results).
captured_grounding: Arc<RwLock<Option<serde_json::Value>>>,
} }
/// Aggregate statistics across all intercepted traffic. /// Aggregate statistics across all intercepted traffic.
@@ -164,6 +180,7 @@ impl MitmStore {
captured_thinking_text: Arc::new(RwLock::new(None)), captured_thinking_text: Arc::new(RwLock::new(None)),
response_complete: Arc::new(AtomicBool::new(false)), response_complete: Arc::new(AtomicBool::new(false)),
generation_params: Arc::new(RwLock::new(None)), generation_params: Arc::new(RwLock::new(None)),
captured_grounding: Arc::new(RwLock::new(None)),
} }
} }
@@ -470,4 +487,23 @@ impl MitmStore {
pub async fn clear_generation_params(&self) { pub async fn clear_generation_params(&self) {
*self.generation_params.write().await = None; *self.generation_params.write().await = None;
} }
// ── Grounding metadata capture ──────────────────────────────────────
/// Store captured grounding metadata from API response.
pub async fn set_grounding(&self, meta: serde_json::Value) {
*self.captured_grounding.write().await = Some(meta);
}
/// Take (consume) captured grounding metadata.
#[allow(dead_code)]
pub async fn take_grounding(&self) -> Option<serde_json::Value> {
self.captured_grounding.write().await.take()
}
/// Peek at grounding metadata without consuming.
#[allow(dead_code)]
pub async fn peek_grounding(&self) -> Option<serde_json::Value> {
self.captured_grounding.read().await.clone()
}
} }