From f0c2574c88c0954b66650f62ec29edcc85b63e7b Mon Sep 17 00:00:00 2001 From: Nikketryhard Date: Sat, 14 Feb 2026 18:35:07 -0600 Subject: [PATCH] =?UTF-8?q?feat:=20MITM=20request=20modification=20?= =?UTF-8?q?=E2=80=94=20strip=20bloat=20from=20LLM=20API=20requests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intercepts streamGenerateContent requests and trims: - System instruction: strips web_application_development, knowledge_discovery, persistent_context, skills sections (~18KB saved) - Content messages: strips empty user_rules, workflows boilerplate, conversation summaries (~4.5KB saved) - Tools: keeps 12 essential coding tools, strips 8 non-essential (browser_subagent, generate_image, search_web, etc. ~6KB saved) Total: ~55% reduction in request size while keeping identity, user info, and all coding-relevant tools intact. Only modifies 'agent' type requests, checkpoint requests pass through unmodified. Also: - Standalone mode is now the default (use --no-standalone to attach to existing LS) - Enable request modification by default - Add mold linker, sccache, nextest config (8 thread cap) - Add .cargo/config.toml and .config/nextest.toml --- .cargo/config.toml | 7 + .config/nextest.toml | 7 + .gitignore | 1 + src/main.rs | 10 +- src/mitm/mod.rs | 1 + src/mitm/modify.rs | 345 +++++++++++++++++++++++++++++++++++++++++++ src/mitm/proxy.rs | 26 +++- 7 files changed, 391 insertions(+), 6 deletions(-) create mode 100644 .cargo/config.toml create mode 100644 .config/nextest.toml create mode 100644 src/mitm/modify.rs diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..1e7674c --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,7 @@ +[target.x86_64-unknown-linux-gnu] +linker = "clang" +rustflags = ["-C", "link-arg=-fuse-ld=mold"] + +[build] +rustc-wrapper = "sccache" +jobs = 8 diff --git a/.config/nextest.toml b/.config/nextest.toml new file mode 100644 index 0000000..1ca40df --- /dev/null +++ b/.config/nextest.toml @@ -0,0 +1,7 @@ +[store] +# Cap test threads to 8 +threads = 8 + +[profile.default] +retries = 0 +slow-timeout = { period = "30s" } diff --git a/.gitignore b/.gitignore index 85a758c..2ff7704 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ *.txt !README.txt test_output.json +captured-request-*.json diff --git a/src/main.rs b/src/main.rs index 82f3f5b..39376d4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -46,9 +46,9 @@ struct Cli { #[arg(long, default_value_t = 8742)] mitm_port: u16, - /// Use a standalone LS (does not touch the real LS) + /// Disable standalone LS — attach to the real running LS instead #[arg(long)] - standalone: bool, + no_standalone: bool, } #[tokio::main] @@ -91,7 +91,7 @@ async fn main() { }; // ── Step 2: Backend discovery (or standalone LS spawn) ───────────────────── - let standalone_ls = if cli.standalone { + let standalone_ls = if !cli.no_standalone { // Standalone mode: discover main LS config, spawn our own let main_config = match standalone::discover_main_ls_config() { Ok(c) => c, @@ -182,7 +182,7 @@ async fn main() { let ca_pem = ca.ca_pem_path.display().to_string(); let config = mitm::proxy::MitmConfig { port: cli.mitm_port, - modify_requests: false, + modify_requests: true, }; match mitm::proxy::run(ca, mitm_store.clone(), config).await { Ok((port, handle)) => { @@ -228,7 +228,7 @@ async fn main() { // Periodic backend refresh — keeps LS connection details fresh // (skip in standalone mode — the port is fixed and discover() would overwrite it) - let is_standalone = cli.standalone; + let is_standalone = !cli.no_standalone; let refresh_backend = Arc::clone(&state.backend); let refresh_handle = tokio::spawn(async move { if is_standalone { diff --git a/src/mitm/mod.rs b/src/mitm/mod.rs index c1c3d10..3434981 100644 --- a/src/mitm/mod.rs +++ b/src/mitm/mod.rs @@ -14,6 +14,7 @@ pub mod ca; pub mod h2_handler; pub mod intercept; +pub mod modify; pub mod proto; pub mod proxy; pub mod store; diff --git a/src/mitm/modify.rs b/src/mitm/modify.rs new file mode 100644 index 0000000..825f881 --- /dev/null +++ b/src/mitm/modify.rs @@ -0,0 +1,345 @@ +//! Request body modification for intercepted LLM API calls. +//! +//! Strips redundant/verbose sections from the Google Gemini API request +//! to reduce token usage while keeping the request looking legitimate. +//! Nothing structural changes — just trimming fat. + +use serde_json::Value; +use tracing::info; + +/// Tools to KEEP — essential coding tools. Everything else gets stripped. +const KEEP_TOOLS: &[&str] = &[ + "view_file", + "write_to_file", + "replace_file_content", + "multi_replace_file_content", + "run_command", + "command_status", + "send_command_input", + "grep_search", + "find_by_name", + "list_dir", + "view_file_outline", + "view_code_item", +]; + +/// System instruction sections to STRIP (matched by XML tag name). +/// These are verbose instructional manuals that add tokens but don't +/// meaningfully affect output quality for coding tasks. +const STRIP_SYSTEM_SECTIONS: &[&str] = &[ + "web_application_development", + "knowledge_discovery", + "persistent_context", + "skills", +]; + +/// Content message patterns to strip entirely. +/// These appear as separate `contents[]` entries with recognizable prefixes. +const STRIP_CONTENT_PREFIXES: &[&str] = &[ + "\nThe user has not defined any custom rules.", + "\n", +]; + +/// Modify a streamGenerateContent request body in-place. +/// Returns the modified JSON bytes, or None if modification wasn't possible. +pub fn modify_request(body: &[u8]) -> Option> { + let mut json: Value = serde_json::from_slice(body).ok()?; + + let original_size = body.len(); + let mut changes: Vec = Vec::new(); + + // ── 1. Strip verbose system instruction sections ────────────────────── + if let Some(sys) = json + .pointer_mut("/request/systemInstruction/parts/0/text") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + { + let mut modified = sys.clone(); + for section in STRIP_SYSTEM_SECTIONS { + let pattern = format!("<{section}>"); + let end_pattern = format!(""); + if let (Some(start), Some(end)) = (modified.find(&pattern), modified.find(&end_pattern)) + { + let end_pos = end + end_pattern.len(); + let removed = end_pos - start; + modified = format!("{}{}", &modified[..start], &modified[end_pos..]); + changes.push(format!("strip <{section}> ({removed} chars)")); + } + } + + if modified.len() != sys.len() { + json["request"]["systemInstruction"]["parts"][0]["text"] = + Value::String(modified); + } + } + + // ── 2. Strip bloated content messages ───────────────────────────────── + if let Some(contents) = json + .pointer_mut("/request/contents") + .and_then(|v| v.as_array_mut()) + { + let before = contents.len(); + + // Remove messages matching strip prefixes + contents.retain(|msg| { + if let Some(text) = msg["parts"][0]["text"].as_str() { + for prefix in STRIP_CONTENT_PREFIXES { + if text.starts_with(prefix) { + return false; + } + } + } + true + }); + + // Strip conversation summaries from remaining messages + // These appear as "# Conversation History\nHere are the conversation IDs..." + for msg in contents.iter_mut() { + if let Some(text) = msg["parts"][0]["text"].as_str().map(|s| s.to_string()) { + if let Some(start) = text.find("# Conversation History\n") { + // Find the end of the conversation summaries block + let end_marker = ""; + let trimmed = if let Some(end) = text.find(end_marker) { + let end_pos = end + end_marker.len(); + // Find next non-whitespace after end marker + let rest = text[end_pos..].trim_start(); + format!("{}{}", &text[..start], rest) + } else { + // No end marker — just cut from "# Conversation History" onward + text[..start].trim_end().to_string() + }; + + if trimmed.len() < text.len() { + let saved = text.len() - trimmed.len(); + changes.push(format!("strip conversation summaries ({saved} chars)")); + msg["parts"][0]["text"] = Value::String(trimmed); + } + } + } + } + + let removed_msgs = before - contents.len(); + if removed_msgs > 0 { + changes.push(format!("remove {removed_msgs} content messages")); + } + } + + // ── 3. Strip non-essential tools ───────────────────────────────────── + if let Some(tools) = json + .pointer_mut("/request/tools") + .and_then(|v| v.as_array_mut()) + { + let before = tools.len(); + + tools.retain(|tool| { + if let Some(name) = tool["functionDeclarations"][0]["name"].as_str() { + KEEP_TOOLS.contains(&name) + } else { + true // keep unknown structure + } + }); + + let removed = before - tools.len(); + if removed > 0 { + changes.push(format!("strip {removed}/{before} tools (keep {})", KEEP_TOOLS.len())); + } + } + + if changes.is_empty() { + return None; // Nothing modified + } + + let modified_bytes = serde_json::to_vec(&json).ok()?; + let saved = original_size as i64 - modified_bytes.len() as i64; + let pct = if original_size > 0 { + (saved as f64 / original_size as f64 * 100.0) as i32 + } else { + 0 + }; + + info!( + original = original_size, + modified = modified_bytes.len(), + saved_bytes = saved, + saved_pct = pct, + "MITM: request modified [{}]", + changes.join(", ") + ); + + Some(modified_bytes) +} + +/// Dechunk an HTTP chunked-encoded body into raw bytes. +/// Input: "hex_size\r\n data\r\n hex_size\r\n data\r\n 0\r\n\r\n" +/// Output: concatenated data segments. +pub fn dechunk(data: &[u8]) -> Vec { + let mut result = Vec::with_capacity(data.len()); + let mut pos = 0; + + while pos < data.len() { + // Find end of chunk size line + let line_end = match data[pos..].windows(2).position(|w| w == b"\r\n") { + Some(p) => pos + p, + None => break, + }; + + // Parse hex chunk size (ignore chunk extensions after ';') + let size_str = std::str::from_utf8(&data[pos..line_end]) + .unwrap_or("") + .split(';') + .next() + .unwrap_or("") + .trim(); + + let chunk_size = match usize::from_str_radix(size_str, 16) { + Ok(0) => break, // Terminal chunk + Ok(n) => n, + Err(_) => break, + }; + + let data_start = line_end + 2; // skip \r\n + let data_end = (data_start + chunk_size).min(data.len()); + result.extend_from_slice(&data[data_start..data_end]); + + // Skip past data + trailing \r\n + pos = data_end + 2; + } + + result +} + +/// Re-encode data as a single HTTP chunk + terminal chunk. +pub fn rechunk(data: &[u8]) -> Vec { + let hex_size = format!("{:x}", data.len()); + let mut result = Vec::with_capacity(hex_size.len() + 2 + data.len() + 2 + 5); + result.extend_from_slice(hex_size.as_bytes()); + result.extend_from_slice(b"\r\n"); + result.extend_from_slice(data); + result.extend_from_slice(b"\r\n0\r\n\r\n"); + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dechunk_basic() { + let chunked = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n"; + let result = dechunk(chunked); + assert_eq!(result, b"hello world"); + } + + #[test] + fn test_dechunk_single() { + let chunked = b"b\r\nhello world\r\n0\r\n\r\n"; + let result = dechunk(chunked); + assert_eq!(result, b"hello world"); + } + + #[test] + fn test_rechunk() { + let data = b"hello world"; + let chunked = rechunk(data); + let expected = b"b\r\nhello world\r\n0\r\n\r\n"; + assert_eq!(chunked, expected); + } + + #[test] + fn test_dechunk_rechunk_roundtrip() { + let original = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n"; + let data = dechunk(original); + let rechunked = rechunk(&data); + let data2 = dechunk(&rechunked); + assert_eq!(data, data2); + } + + #[test] + fn test_modify_strips_tools() { + let body = serde_json::json!({ + "project": "test", + "requestId": "test/1", + "request": { + "contents": [{"role": "user", "parts": [{"text": "hello"}]}], + "tools": [ + {"functionDeclarations": [{"name": "view_file", "description": "view", "parameters": {}}]}, + {"functionDeclarations": [{"name": "browser_subagent", "description": "browse", "parameters": {}}]}, + {"functionDeclarations": [{"name": "grep_search", "description": "grep", "parameters": {}}]}, + {"functionDeclarations": [{"name": "generate_image", "description": "img", "parameters": {}}]}, + ], + "generationConfig": {} + }, + "model": "test" + }); + + let bytes = serde_json::to_vec(&body).unwrap(); + let modified = modify_request(&bytes).unwrap(); + let result: Value = serde_json::from_slice(&modified).unwrap(); + + let tool_names: Vec<&str> = result["request"]["tools"] + .as_array() + .unwrap() + .iter() + .map(|t| t["functionDeclarations"][0]["name"].as_str().unwrap()) + .collect(); + + assert!(tool_names.contains(&"view_file")); + assert!(tool_names.contains(&"grep_search")); + assert!(!tool_names.contains(&"browser_subagent")); + assert!(!tool_names.contains(&"generate_image")); + } + + #[test] + fn test_modify_strips_system_sections() { + let sys_text = "I am an AI\nlots of web dev stuff here\nbe helpful"; + let body = serde_json::json!({ + "project": "test", + "requestId": "test/1", + "request": { + "contents": [{"role": "user", "parts": [{"text": "hello"}]}], + "systemInstruction": {"parts": [{"text": sys_text}]}, + "tools": [], + "generationConfig": {} + }, + "model": "test" + }); + + let bytes = serde_json::to_vec(&body).unwrap(); + let modified = modify_request(&bytes).unwrap(); + let result: Value = serde_json::from_slice(&modified).unwrap(); + + let new_sys = result["request"]["systemInstruction"]["parts"][0]["text"] + .as_str() + .unwrap(); + + assert!(new_sys.contains("")); + assert!(new_sys.contains("")); + assert!(!new_sys.contains("web_application_development")); + assert!(!new_sys.contains("lots of web dev stuff")); + } + + #[test] + fn test_modify_strips_empty_user_rules() { + let body = serde_json::json!({ + "project": "test", + "requestId": "test/1", + "request": { + "contents": [ + {"role": "user", "parts": [{"text": "\nThe user has not defined any custom rules.\n"}]}, + {"role": "user", "parts": [{"text": "hello world"}]}, + ], + "tools": [], + "generationConfig": {} + }, + "model": "test" + }); + + let bytes = serde_json::to_vec(&body).unwrap(); + let modified = modify_request(&bytes).unwrap(); + let result: Value = serde_json::from_slice(&modified).unwrap(); + + let contents = result["request"]["contents"].as_array().unwrap(); + assert_eq!(contents.len(), 1); + assert_eq!(contents[0]["parts"][0]["text"].as_str().unwrap(), "hello world"); + } +} diff --git a/src/mitm/proxy.rs b/src/mitm/proxy.rs index 4ba7277..e497b0c 100644 --- a/src/mitm/proxy.rs +++ b/src/mitm/proxy.rs @@ -363,7 +363,7 @@ async fn handle_http_over_tls( mut client: tokio_rustls::server::TlsStream, domain: &str, store: MitmStore, - _modify_requests: bool, + modify_requests: bool, ) -> Result<(), String> { let mut tmp = vec![0u8; 32768]; @@ -535,12 +535,36 @@ async fn handle_http_over_tls( // Log LLM calls at info, everything else at debug if req_path.contains("streamGenerateContent") { + let body_len = request_buf.len() - headers_end; info!( domain, req_path = %req_path, + body_len, cascade = ?cascade_hint, "MITM: forwarding LLM request" ); + + // ── Request modification ───────────────────────────────────── + // Dechunk body → check if agent request → modify → rechunk + if modify_requests && body_len > 0 { + let body_slice = &request_buf[headers_end..]; + let raw_body = super::modify::dechunk(body_slice); + + // Only modify "agent" requests, not "checkpoint" (LS internal) + let is_agent = raw_body + .windows(20) + .any(|w| w == b"\"requestType\":\"agent" || w == b"requestType\":\"agent\""); + + if is_agent { + if let Some(modified_body) = super::modify::modify_request(&raw_body) { + // Rebuild request_buf: original headers + rechunked modified body + let new_chunked = super::modify::rechunk(&modified_body); + let mut new_buf = request_buf[..headers_end].to_vec(); + new_buf.extend_from_slice(&new_chunked); + request_buf = new_buf; + } + } + } } else { debug!( domain,