//! Request body modification for intercepted LLM API calls. //! //! Aggressively strips everything except identity and actual conversation //! from the Gemini API request. No integrity checks exist on the request //! body — Google validates OAuth, project, model, and JSON structure only. use regex::Regex; use serde_json::Value; use tracing::info; /// Strip ALL tool definitions. /// Set to false to allow tools through (for tool call testing). const STRIP_ALL_TOOLS: bool = false; /// Modify a streamGenerateContent request body in-place. /// Returns the modified JSON bytes, or None if modification wasn't possible. pub fn modify_request(body: &[u8]) -> Option> { let mut json: Value = serde_json::from_slice(body).ok()?; let original_size = body.len(); let mut changes: Vec = Vec::new(); // ── 1. System instruction: keep ONLY , nuke everything else ── if let Some(sys) = json .pointer_mut("/request/systemInstruction/parts/0/text") .and_then(|v| v.as_str()) .map(|s| s.to_string()) { let original_len = sys.len(); // Extract ... block let identity = extract_xml_section(&sys, "identity"); if let Some(identity_text) = identity { let new_sys = format!("\n{}\n", identity_text.trim()); let stripped = original_len - new_sys.len(); if stripped > 0 { changes.push(format!( "system instruction: keep only ({original_len} → {} chars, -{stripped})", new_sys.len() )); json["request"]["systemInstruction"]["parts"][0]["text"] = Value::String(new_sys); } } else { // No identity tag found — clear the whole thing changes.push(format!("system instruction: cleared ({original_len} chars)")); json["request"]["systemInstruction"]["parts"][0]["text"] = Value::String(String::new()); } } // ── 2. Content messages: keep only actual conversation turns ─────────── if let Some(contents) = json .pointer_mut("/request/contents") .and_then(|v| v.as_array_mut()) { let before = contents.len(); // Remove messages that are pure Antigravity context injection contents.retain(|msg| { if let Some(text) = msg["parts"][0]["text"].as_str() { // Strip user_information (OS, workspace paths) if text.starts_with("") { return false; } // Strip user_rules / MEMORY blocks if text.starts_with("") { return false; } // Strip workflows if text.starts_with("") { return false; } // Strip MCP servers block if text.starts_with("") { return false; } } true }); // For remaining messages, strip embedded metadata for msg in contents.iter_mut() { if let Some(text) = msg["parts"][0]["text"].as_str().map(|s| s.to_string()) { let mut modified = text.clone(); // Strip conversation summaries block if let Some(cleaned) = strip_between(&modified, "# Conversation History\n", "") { modified = cleaned; } // Strip blocks (cursor pos, open files, etc.) if let Some(cleaned) = strip_xml_section(&modified, "ADDITIONAL_METADATA") { modified = cleaned; } // Strip blocks if let Some(cleaned) = strip_xml_section(&modified, "EPHEMERAL_MESSAGE") { modified = cleaned; } // Strip "Step Id: N\n" prefixes if modified.starts_with("Step Id:") { if let Some(newline_pos) = modified.find('\n') { modified = modified[newline_pos + 1..].to_string(); } } // Strip knowledge item blocks if let Some(cleaned) = strip_between(&modified, "Here are the ", "") { // Only strip if it's about knowledge items if cleaned.len() < modified.len() && modified.contains("knowledge item") { modified = cleaned; } } // Clean up excessive whitespace from stripping let modified = collapse_newlines(&modified); if modified.len() < text.len() { msg["parts"][0]["text"] = Value::String(modified); } } } // Remove now-empty messages contents.retain(|msg| { if let Some(text) = msg["parts"][0]["text"].as_str() { !text.trim().is_empty() } else { true } }); let removed = before - contents.len(); if removed > 0 { changes.push(format!("remove {removed}/{before} content messages")); } } // ── 3. Strip all tool definitions ──────────────────────────────────── if STRIP_ALL_TOOLS { if let Some(tools) = json .pointer_mut("/request/tools") .and_then(|v| v.as_array_mut()) { let count = tools.len(); if count > 0 { tools.clear(); changes.push(format!("strip all {count} tools")); } } } // ── 4. Inject includeThoughts to capture thinking text ─────────────── // Without this flag, Google only reports thinking token counts // but doesn't send the thinking text in SSE parts. { // Ensure request.generationConfig.thinkingConfig.includeThoughts = true let request = json.get_mut("request").and_then(|v| v.as_object_mut()); if let Some(req) = request { let gen_config = req .entry("generationConfig") .or_insert_with(|| serde_json::json!({})); if let Some(gc) = gen_config.as_object_mut() { let thinking_config = gc .entry("thinkingConfig") .or_insert_with(|| serde_json::json!({})); if let Some(tc) = thinking_config.as_object_mut() { if !tc.contains_key("includeThoughts") { tc.insert("includeThoughts".to_string(), Value::Bool(true)); changes.push("inject includeThoughts".to_string()); } } } } else { // Not wrapped in request — try top-level (public API format) let gen_config = json.as_object_mut().and_then(|o| { Some(o.entry("generationConfig") .or_insert_with(|| serde_json::json!({}))) }); if let Some(gc) = gen_config.and_then(|v| v.as_object_mut()) { let thinking_config = gc .entry("thinkingConfig") .or_insert_with(|| serde_json::json!({})); if let Some(tc) = thinking_config.as_object_mut() { if !tc.contains_key("includeThoughts") { tc.insert("includeThoughts".to_string(), Value::Bool(true)); changes.push("inject includeThoughts (top-level)".to_string()); } } } } } if changes.is_empty() { return None; // Nothing modified } let modified_bytes = serde_json::to_vec(&json).ok()?; let saved = original_size as i64 - modified_bytes.len() as i64; let pct = if original_size > 0 { (saved as f64 / original_size as f64 * 100.0) as i32 } else { 0 }; info!( original = original_size, modified = modified_bytes.len(), saved_bytes = saved, saved_pct = pct, "MITM: request modified [{}]", changes.join(", ") ); Some(modified_bytes) } /// Extract the inner text of an XML-style section. fn extract_xml_section(text: &str, tag: &str) -> Option { let open = format!("<{tag}>"); let close = format!(""); let start = text.find(&open)?; let end = text.find(&close)?; let inner_start = start + open.len(); if inner_start >= end { return None; } Some(text[inner_start..end].to_string()) } /// Strip an XML-style section and return the modified text. fn strip_xml_section(text: &str, tag: &str) -> Option { let open = format!("<{tag}>"); let close = format!(""); let start = text.find(&open)?; let end = text.find(&close)?; let end_pos = end + close.len(); Some(format!("{}{}", &text[..start], &text[end_pos..])) } /// Strip everything between two markers (inclusive of markers). fn strip_between(text: &str, start_marker: &str, end_marker: &str) -> Option { let start = text.find(start_marker)?; let end = text.find(end_marker)?; let end_pos = end + end_marker.len(); // Skip any trailing whitespace after end marker let rest = text[end_pos..].trim_start(); Some(format!("{}{}", &text[..start], rest)) } /// Collapse 3+ consecutive newlines into 2. fn collapse_newlines(text: &str) -> String { let re = Regex::new(r"\n{3,}").unwrap(); re.replace_all(text, "\n\n").to_string() } /// Dechunk an HTTP chunked-encoded body into raw bytes. pub fn dechunk(data: &[u8]) -> Vec { let mut result = Vec::with_capacity(data.len()); let mut pos = 0; while pos < data.len() { let line_end = match data[pos..].windows(2).position(|w| w == b"\r\n") { Some(p) => pos + p, None => break, }; let size_str = std::str::from_utf8(&data[pos..line_end]) .unwrap_or("") .split(';') .next() .unwrap_or("") .trim(); let chunk_size = match usize::from_str_radix(size_str, 16) { Ok(0) => break, Ok(n) => n, Err(_) => break, }; let data_start = line_end + 2; let data_end = (data_start + chunk_size).min(data.len()); result.extend_from_slice(&data[data_start..data_end]); pos = data_end + 2; } result } /// Re-encode data as a single HTTP chunk + terminal chunk. pub fn rechunk(data: &[u8]) -> Vec { let hex_size = format!("{:x}", data.len()); let mut result = Vec::with_capacity(hex_size.len() + 2 + data.len() + 2 + 5); result.extend_from_slice(hex_size.as_bytes()); result.extend_from_slice(b"\r\n"); result.extend_from_slice(data); result.extend_from_slice(b"\r\n0\r\n\r\n"); result } #[cfg(test)] mod tests { use super::*; #[test] fn test_dechunk_basic() { let chunked = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n"; let result = dechunk(chunked); assert_eq!(result, b"hello world"); } #[test] fn test_dechunk_single() { let chunked = b"b\r\nhello world\r\n0\r\n\r\n"; let result = dechunk(chunked); assert_eq!(result, b"hello world"); } #[test] fn test_rechunk() { let data = b"hello world"; let chunked = rechunk(data); let expected = b"b\r\nhello world\r\n0\r\n\r\n"; assert_eq!(chunked, expected); } #[test] fn test_dechunk_rechunk_roundtrip() { let original = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n"; let data = dechunk(original); let rechunked = rechunk(&data); let data2 = dechunk(&rechunked); assert_eq!(data, data2); } #[test] fn test_modify_strips_all_tools() { let body = serde_json::json!({ "project": "test", "requestId": "test/1", "request": { "contents": [{"role": "user", "parts": [{"text": "hello"}]}], "tools": [ {"functionDeclarations": [{"name": "view_file", "description": "view", "parameters": {}}]}, {"functionDeclarations": [{"name": "browser_subagent", "description": "browse", "parameters": {}}]}, ], "generationConfig": {} }, "model": "test" }); let bytes = serde_json::to_vec(&body).unwrap(); let modified = modify_request(&bytes).unwrap(); let result: Value = serde_json::from_slice(&modified).unwrap(); let tools = result["request"]["tools"].as_array().unwrap(); assert!(tools.is_empty(), "all tools should be stripped"); } #[test] fn test_modify_keeps_only_identity() { let sys_text = "\nYou are a helpful AI.\n\n\n\nUse absolute paths.\n\n\nlots of web dev stuff\n\n\nbe helpful\n"; let body = serde_json::json!({ "project": "test", "requestId": "test/1", "request": { "contents": [{"role": "user", "parts": [{"text": "hello"}]}], "systemInstruction": {"parts": [{"text": sys_text}]}, "tools": [], "generationConfig": {} }, "model": "test" }); let bytes = serde_json::to_vec(&body).unwrap(); let modified = modify_request(&bytes).unwrap(); let result: Value = serde_json::from_slice(&modified).unwrap(); let new_sys = result["request"]["systemInstruction"]["parts"][0]["text"] .as_str() .unwrap(); assert!(new_sys.contains("")); assert!(new_sys.contains("You are a helpful AI.")); assert!(!new_sys.contains("tool_calling")); assert!(!new_sys.contains("web_application_development")); assert!(!new_sys.contains("communication_style")); } #[test] fn test_modify_strips_context_messages() { let body = serde_json::json!({ "project": "test", "requestId": "test/1", "request": { "contents": [ {"role": "user", "parts": [{"text": "\nLinux\n"}]}, {"role": "user", "parts": [{"text": "\nno rules\n"}]}, {"role": "user", "parts": [{"text": "\nsome workflows\n"}]}, {"role": "user", "parts": [{"text": "Step Id: 0\n\n\nSay hello\n\n\ncursor stuff\n"}]}, {"role": "model", "parts": [{"text": "Hello!"}]}, ], "tools": [], "generationConfig": {} }, "model": "test" }); let bytes = serde_json::to_vec(&body).unwrap(); let modified = modify_request(&bytes).unwrap(); let result: Value = serde_json::from_slice(&modified).unwrap(); let contents = result["request"]["contents"].as_array().unwrap(); // Should have removed user_information, user_rules, workflows (3 messages) // Kept: USER_REQUEST message (with ADDITIONAL_METADATA stripped) + model response assert_eq!(contents.len(), 2, "should keep only user request + model response"); // Check USER_REQUEST message had metadata stripped let user_msg = contents[0]["parts"][0]["text"].as_str().unwrap(); assert!(user_msg.contains("Say hello"), "should keep user request"); assert!(!user_msg.contains("ADDITIONAL_METADATA"), "should strip metadata"); assert!(!user_msg.contains("cursor stuff"), "should strip cursor info"); assert!(!user_msg.starts_with("Step Id:"), "should strip step id"); // Model response kept intact assert_eq!(contents[1]["parts"][0]["text"].as_str().unwrap(), "Hello!"); } #[test] fn test_extract_xml_section() { let text = "before \nI am AI\n after"; let result = extract_xml_section(text, "identity").unwrap(); assert_eq!(result, "\nI am AI\n"); } #[test] fn test_strip_xml_section() { let text = "before \nstuff\n after"; let result = strip_xml_section(text, "META").unwrap(); assert_eq!(result, "before after"); } #[test] fn test_strip_between() { let text = "keep this # Conversation History\nlots of stuff\n\nand this"; let result = strip_between(text, "# Conversation History\n", "").unwrap(); assert_eq!(result, "keep this and this"); } }