chore: remove broken googleSearch grounding and /v1/search endpoint
This commit is contained in:
@@ -5,6 +5,7 @@ mod gemini;
|
||||
mod models;
|
||||
mod polling;
|
||||
mod responses;
|
||||
|
||||
mod types;
|
||||
mod util;
|
||||
|
||||
@@ -43,6 +44,7 @@ pub fn router(state: Arc<AppState>) -> Router {
|
||||
post(completions::handle_completions),
|
||||
)
|
||||
.route("/v1/gemini", post(gemini::handle_gemini))
|
||||
|
||||
.route("/v1/models", get(handle_models))
|
||||
.route("/v1/sessions", get(handle_list_sessions))
|
||||
.route("/v1/sessions/{id}", delete(handle_delete_session))
|
||||
@@ -67,6 +69,7 @@ async fn handle_root() -> Json<serde_json::Value> {
|
||||
"/v1/chat/completions",
|
||||
"/v1/responses",
|
||||
"/v1/gemini",
|
||||
|
||||
"/v1/models",
|
||||
"/v1/sessions",
|
||||
"/v1/token",
|
||||
|
||||
@@ -356,6 +356,7 @@ fn print_banner(port: u16, pid: &str, https_port: &str, csrf: &str, token: &str,
|
||||
println!(" \x1b[1mroutes\x1b[0m");
|
||||
println!(" \x1b[33m POST\x1b[0m /v1/responses");
|
||||
println!(" \x1b[33m POST\x1b[0m /v1/chat/completions");
|
||||
println!(" \x1b[33m POST\x1b[0m /v1/gemini");
|
||||
println!(" \x1b[32m GET \x1b[0m /v1/models");
|
||||
println!(" \x1b[32m GET \x1b[0m /v1/sessions");
|
||||
println!(" \x1b[31m DEL \x1b[0m /v1/sessions/:id");
|
||||
|
||||
@@ -68,6 +68,9 @@ pub struct StreamingAccumulator {
|
||||
pub api_provider: Option<String>,
|
||||
/// Captured function calls from Google's response.
|
||||
pub function_calls: Vec<CapturedFunctionCall>,
|
||||
/// Captured grounding metadata from Google Search grounding.
|
||||
/// Contains search queries, web results, and citations.
|
||||
pub grounding_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
impl StreamingAccumulator {
|
||||
@@ -137,6 +140,15 @@ impl StreamingAccumulator {
|
||||
info!(finish_reason = reason, "MITM: non-STOP finish reason");
|
||||
}
|
||||
}
|
||||
// Capture grounding metadata (Google Search grounding results)
|
||||
if let Some(gm) = candidate.get("groundingMetadata") {
|
||||
self.grounding_metadata = Some(gm.clone());
|
||||
debug!(
|
||||
has_search_queries = gm.get("searchEntryPoint").is_some(),
|
||||
has_web_results = gm.get("groundingChunks").is_some(),
|
||||
"MITM: captured grounding metadata"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.api_provider = Some("google".to_string());
|
||||
|
||||
@@ -313,7 +313,17 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
||||
// ── 4. Inject includeThoughts to capture thinking text ───────────────
|
||||
// Without this flag, Google only reports thinking token counts
|
||||
// but doesn't send the thinking text in SSE parts.
|
||||
//
|
||||
// Also inject thinkingLevel if client specified reasoning_effort.
|
||||
// Gemini 3 uses thinkingLevel ("low"/"medium"/"high"/"minimal")
|
||||
// instead of Gemini 2.5's thinkingBudget (integer).
|
||||
{
|
||||
// Get reasoning_effort from generation params if available
|
||||
let reasoning_effort = tool_ctx
|
||||
.as_ref()
|
||||
.and_then(|ctx| ctx.generation_params.as_ref())
|
||||
.and_then(|gp| gp.reasoning_effort.clone());
|
||||
|
||||
// Ensure request.generationConfig.thinkingConfig.includeThoughts = true
|
||||
let request = json.get_mut("request").and_then(|v| v.as_object_mut());
|
||||
if let Some(req) = request {
|
||||
@@ -329,6 +339,10 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
||||
tc.insert("includeThoughts".to_string(), Value::Bool(true));
|
||||
changes.push("inject includeThoughts".to_string());
|
||||
}
|
||||
if let Some(ref effort) = reasoning_effort {
|
||||
tc.insert("thinkingLevel".to_string(), Value::String(effort.clone()));
|
||||
changes.push(format!("inject thinkingLevel={effort}"));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -346,6 +360,10 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
||||
tc.insert("includeThoughts".to_string(), Value::Bool(true));
|
||||
changes.push("inject includeThoughts (top-level)".to_string());
|
||||
}
|
||||
if let Some(ref effort) = reasoning_effort {
|
||||
tc.insert("thinkingLevel".to_string(), Value::String(effort.clone()));
|
||||
changes.push(format!("inject thinkingLevel={effort} (top-level)"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -399,6 +417,14 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
||||
gc.insert("presencePenalty".to_string(), serde_json::json!(pp));
|
||||
injected.push(format!("presencePenalty={pp}"));
|
||||
}
|
||||
if let Some(ref mime) = gp.response_mime_type {
|
||||
gc.insert("responseMimeType".to_string(), serde_json::json!(mime));
|
||||
injected.push(format!("responseMimeType={mime}"));
|
||||
}
|
||||
if let Some(ref schema) = gp.response_schema {
|
||||
gc.insert("responseSchema".to_string(), schema.clone());
|
||||
injected.push("responseSchema=<schema>".to_string());
|
||||
}
|
||||
|
||||
if !injected.is_empty() {
|
||||
changes.push(format!("inject generationConfig: {}", injected.join(", ")));
|
||||
@@ -428,6 +454,8 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
||||
changes.join(", ")
|
||||
);
|
||||
|
||||
|
||||
|
||||
Some(modified_bytes)
|
||||
}
|
||||
|
||||
|
||||
@@ -73,6 +73,18 @@ pub struct GenerationParams {
|
||||
pub frequency_penalty: Option<f64>,
|
||||
/// Presence penalty (OpenAI) — mapped to presencePenalty in Gemini.
|
||||
pub presence_penalty: Option<f64>,
|
||||
/// Reasoning effort — mapped to thinkingConfig.thinkingLevel in Gemini 3.
|
||||
/// Values: "low", "medium", "high" (maps 1:1 to Google's thinkingLevel).
|
||||
pub reasoning_effort: Option<String>,
|
||||
/// Response MIME type — injected as generationConfig.responseMimeType.
|
||||
/// e.g., "application/json" for JSON mode.
|
||||
pub response_mime_type: Option<String>,
|
||||
/// Response schema — injected as generationConfig.responseSchema.
|
||||
/// Used for structured output (json_schema format).
|
||||
pub response_schema: Option<serde_json::Value>,
|
||||
/// Enable Google Search grounding — injects {"googleSearch": {}} into tools.
|
||||
/// Default off. When enabled, model responses include groundingMetadata.
|
||||
pub google_search: bool,
|
||||
}
|
||||
|
||||
/// Thread-safe store for intercepted data.
|
||||
@@ -121,6 +133,10 @@ pub struct MitmStore {
|
||||
// ── Generation parameters for MITM injection ─────────────────────────
|
||||
/// Client-specified sampling parameters to inject into Google API requests.
|
||||
generation_params: Arc<RwLock<Option<GenerationParams>>>,
|
||||
|
||||
// ── Grounding metadata capture ──────────────────────────────────────
|
||||
/// Captured grounding metadata from Google API responses (search results).
|
||||
captured_grounding: Arc<RwLock<Option<serde_json::Value>>>,
|
||||
}
|
||||
|
||||
/// Aggregate statistics across all intercepted traffic.
|
||||
@@ -164,6 +180,7 @@ impl MitmStore {
|
||||
captured_thinking_text: Arc::new(RwLock::new(None)),
|
||||
response_complete: Arc::new(AtomicBool::new(false)),
|
||||
generation_params: Arc::new(RwLock::new(None)),
|
||||
captured_grounding: Arc::new(RwLock::new(None)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -470,4 +487,23 @@ impl MitmStore {
|
||||
pub async fn clear_generation_params(&self) {
|
||||
*self.generation_params.write().await = None;
|
||||
}
|
||||
|
||||
// ── Grounding metadata capture ──────────────────────────────────────
|
||||
|
||||
/// Store captured grounding metadata from API response.
|
||||
pub async fn set_grounding(&self, meta: serde_json::Value) {
|
||||
*self.captured_grounding.write().await = Some(meta);
|
||||
}
|
||||
|
||||
/// Take (consume) captured grounding metadata.
|
||||
#[allow(dead_code)]
|
||||
pub async fn take_grounding(&self) -> Option<serde_json::Value> {
|
||||
self.captured_grounding.write().await.take()
|
||||
}
|
||||
|
||||
/// Peek at grounding metadata without consuming.
|
||||
#[allow(dead_code)]
|
||||
pub async fn peek_grounding(&self) -> Option<serde_json::Value> {
|
||||
self.captured_grounding.read().await.clone()
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user