feat: MITM request modification — strip bloat from LLM API requests

Intercepts streamGenerateContent requests and trims: - System instruction: strips web_application_development, knowledge_discovery, persistent_context, skills sections (~18KB saved) - Content messages: strips empty user_rules, workflows boilerplate, conversation summaries (~4.5KB saved) - Tools: keeps 12 essential coding tools, strips 8 non-essential (browser_subagent, generate_image, search_web, etc. ~6KB saved) Total: ~55% reduction in request size while keeping identity, user info, and all coding-relevant tools intact. Only modifies 'agent' type requests, checkpoint requests pass through unmodified. Also: - Standalone mode is now the default (use --no-standalone to attach to existing LS) - Enable request modification by default - Add mold linker, sccache, nextest config (8 thread cap) - Add .cargo/config.toml and .config/nextest.toml
2026-02-14 18:35:07 -06:00
parent 061b08fc8f
commit f0c2574c88
7 changed files with 391 additions and 6 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -0,0 +1,7 @@
+[target.x86_64-unknown-linux-gnu]
+linker = "clang"
+rustflags = ["-C", "link-arg=-fuse-ld=mold"]
+
+[build]
+rustc-wrapper = "sccache"
+jobs = 8
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -0,0 +1,7 @@
+[store]
+# Cap test threads to 8
+threads = 8
+
+[profile.default]
+retries = 0
+slow-timeout = { period = "30s" }
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
 *.txt
 !README.txt
 test_output.json
+captured-request-*.json
--- a/src/main.rs
+++ b/src/main.rs
@@ -46,9 +46,9 @@ struct Cli {
    #[arg(long, default_value_t = 8742)]
    mitm_port: u16,

-    /// Use a standalone LS (does not touch the real LS)
+    /// Disable standalone LS — attach to the real running LS instead
    #[arg(long)]
-    standalone: bool,
+    no_standalone: bool,
 }

 #[tokio::main]
@@ -91,7 +91,7 @@ async fn main() {
    };

    // ── Step 2: Backend discovery (or standalone LS spawn) ─────────────────────
-    let standalone_ls = if cli.standalone {
+    let standalone_ls = if !cli.no_standalone {
        // Standalone mode: discover main LS config, spawn our own
        let main_config = match standalone::discover_main_ls_config() {
            Ok(c) => c,
@@ -182,7 +182,7 @@ async fn main() {
                let ca_pem = ca.ca_pem_path.display().to_string();
                let config = mitm::proxy::MitmConfig {
                    port: cli.mitm_port,
-                    modify_requests: false,
+                    modify_requests: true,
                };
                match mitm::proxy::run(ca, mitm_store.clone(), config).await {
                    Ok((port, handle)) => {
@@ -228,7 +228,7 @@ async fn main() {

    // Periodic backend refresh — keeps LS connection details fresh
    // (skip in standalone mode — the port is fixed and discover() would overwrite it)
-    let is_standalone = cli.standalone;
+    let is_standalone = !cli.no_standalone;
    let refresh_backend = Arc::clone(&state.backend);
    let refresh_handle = tokio::spawn(async move {
        if is_standalone {
--- a/src/mitm/mod.rs
+++ b/src/mitm/mod.rs
@@ -14,6 +14,7 @@
 pub mod ca;
 pub mod h2_handler;
 pub mod intercept;
+pub mod modify;
 pub mod proto;
 pub mod proxy;
 pub mod store;
--- a/src/mitm/modify.rs
+++ b/src/mitm/modify.rs
@@ -0,0 +1,345 @@
+//! Request body modification for intercepted LLM API calls.
+//!
+//! Strips redundant/verbose sections from the Google Gemini API request
+//! to reduce token usage while keeping the request looking legitimate.
+//! Nothing structural changes — just trimming fat.
+
+use serde_json::Value;
+use tracing::info;
+
+/// Tools to KEEP — essential coding tools. Everything else gets stripped.
+const KEEP_TOOLS: &[&str] = &[
+    "view_file",
+    "write_to_file",
+    "replace_file_content",
+    "multi_replace_file_content",
+    "run_command",
+    "command_status",
+    "send_command_input",
+    "grep_search",
+    "find_by_name",
+    "list_dir",
+    "view_file_outline",
+    "view_code_item",
+];
+
+/// System instruction sections to STRIP (matched by XML tag name).
+/// These are verbose instructional manuals that add tokens but don't
+/// meaningfully affect output quality for coding tasks.
+const STRIP_SYSTEM_SECTIONS: &[&str] = &[
+    "web_application_development",
+    "knowledge_discovery",
+    "persistent_context",
+    "skills",
+];
+
+/// Content message patterns to strip entirely.
+/// These appear as separate `contents[]` entries with recognizable prefixes.
+const STRIP_CONTENT_PREFIXES: &[&str] = &[
+    "<user_rules>\nThe user has not defined any custom rules.",
+    "<workflows>\n",
+];
+
+/// Modify a streamGenerateContent request body in-place.
+/// Returns the modified JSON bytes, or None if modification wasn't possible.
+pub fn modify_request(body: &[u8]) -> Option<Vec<u8>> {
+    let mut json: Value = serde_json::from_slice(body).ok()?;
+
+    let original_size = body.len();
+    let mut changes: Vec<String> = Vec::new();
+
+    // ── 1. Strip verbose system instruction sections ──────────────────────
+    if let Some(sys) = json
+        .pointer_mut("/request/systemInstruction/parts/0/text")
+        .and_then(|v| v.as_str())
+        .map(|s| s.to_string())
+    {
+        let mut modified = sys.clone();
+        for section in STRIP_SYSTEM_SECTIONS {
+            let pattern = format!("<{section}>");
+            let end_pattern = format!("</{section}>");
+            if let (Some(start), Some(end)) = (modified.find(&pattern), modified.find(&end_pattern))
+            {
+                let end_pos = end + end_pattern.len();
+                let removed = end_pos - start;
+                modified = format!("{}{}", &modified[..start], &modified[end_pos..]);
+                changes.push(format!("strip <{section}> ({removed} chars)"));
+            }
+        }
+
+        if modified.len() != sys.len() {
+            json["request"]["systemInstruction"]["parts"][0]["text"] =
+                Value::String(modified);
+        }
+    }
+
+    // ── 2. Strip bloated content messages ─────────────────────────────────
+    if let Some(contents) = json
+        .pointer_mut("/request/contents")
+        .and_then(|v| v.as_array_mut())
+    {
+        let before = contents.len();
+
+        // Remove messages matching strip prefixes
+        contents.retain(|msg| {
+            if let Some(text) = msg["parts"][0]["text"].as_str() {
+                for prefix in STRIP_CONTENT_PREFIXES {
+                    if text.starts_with(prefix) {
+                        return false;
+                    }
+                }
+            }
+            true
+        });
+
+        // Strip conversation summaries from remaining messages
+        // These appear as "# Conversation History\nHere are the conversation IDs..."
+        for msg in contents.iter_mut() {
+            if let Some(text) = msg["parts"][0]["text"].as_str().map(|s| s.to_string()) {
+                if let Some(start) = text.find("# Conversation History\n") {
+                    // Find the end of the conversation summaries block
+                    let end_marker = "</conversation_summaries>";
+                    let trimmed = if let Some(end) = text.find(end_marker) {
+                        let end_pos = end + end_marker.len();
+                        // Find next non-whitespace after end marker
+                        let rest = text[end_pos..].trim_start();
+                        format!("{}{}", &text[..start], rest)
+                    } else {
+                        // No end marker — just cut from "# Conversation History" onward
+                        text[..start].trim_end().to_string()
+                    };
+
+                    if trimmed.len() < text.len() {
+                        let saved = text.len() - trimmed.len();
+                        changes.push(format!("strip conversation summaries ({saved} chars)"));
+                        msg["parts"][0]["text"] = Value::String(trimmed);
+                    }
+                }
+            }
+        }
+
+        let removed_msgs = before - contents.len();
+        if removed_msgs > 0 {
+            changes.push(format!("remove {removed_msgs} content messages"));
+        }
+    }
+
+    // ── 3. Strip non-essential tools ─────────────────────────────────────
+    if let Some(tools) = json
+        .pointer_mut("/request/tools")
+        .and_then(|v| v.as_array_mut())
+    {
+        let before = tools.len();
+
+        tools.retain(|tool| {
+            if let Some(name) = tool["functionDeclarations"][0]["name"].as_str() {
+                KEEP_TOOLS.contains(&name)
+            } else {
+                true // keep unknown structure
+            }
+        });
+
+        let removed = before - tools.len();
+        if removed > 0 {
+            changes.push(format!("strip {removed}/{before} tools (keep {})", KEEP_TOOLS.len()));
+        }
+    }
+
+    if changes.is_empty() {
+        return None; // Nothing modified
+    }
+
+    let modified_bytes = serde_json::to_vec(&json).ok()?;
+    let saved = original_size as i64 - modified_bytes.len() as i64;
+    let pct = if original_size > 0 {
+        (saved as f64 / original_size as f64 * 100.0) as i32
+    } else {
+        0
+    };
+
+    info!(
+        original = original_size,
+        modified = modified_bytes.len(),
+        saved_bytes = saved,
+        saved_pct = pct,
+        "MITM: request modified [{}]",
+        changes.join(", ")
+    );
+
+    Some(modified_bytes)
+}
+
+/// Dechunk an HTTP chunked-encoded body into raw bytes.
+/// Input: "hex_size\r\n data\r\n hex_size\r\n data\r\n 0\r\n\r\n"
+/// Output: concatenated data segments.
+pub fn dechunk(data: &[u8]) -> Vec<u8> {
+    let mut result = Vec::with_capacity(data.len());
+    let mut pos = 0;
+
+    while pos < data.len() {
+        // Find end of chunk size line
+        let line_end = match data[pos..].windows(2).position(|w| w == b"\r\n") {
+            Some(p) => pos + p,
+            None => break,
+        };
+
+        // Parse hex chunk size (ignore chunk extensions after ';')
+        let size_str = std::str::from_utf8(&data[pos..line_end])
+            .unwrap_or("")
+            .split(';')
+            .next()
+            .unwrap_or("")
+            .trim();
+
+        let chunk_size = match usize::from_str_radix(size_str, 16) {
+            Ok(0) => break, // Terminal chunk
+            Ok(n) => n,
+            Err(_) => break,
+        };
+
+        let data_start = line_end + 2; // skip \r\n
+        let data_end = (data_start + chunk_size).min(data.len());
+        result.extend_from_slice(&data[data_start..data_end]);
+
+        // Skip past data + trailing \r\n
+        pos = data_end + 2;
+    }
+
+    result
+}
+
+/// Re-encode data as a single HTTP chunk + terminal chunk.
+pub fn rechunk(data: &[u8]) -> Vec<u8> {
+    let hex_size = format!("{:x}", data.len());
+    let mut result = Vec::with_capacity(hex_size.len() + 2 + data.len() + 2 + 5);
+    result.extend_from_slice(hex_size.as_bytes());
+    result.extend_from_slice(b"\r\n");
+    result.extend_from_slice(data);
+    result.extend_from_slice(b"\r\n0\r\n\r\n");
+    result
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_dechunk_basic() {
+        let chunked = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
+        let result = dechunk(chunked);
+        assert_eq!(result, b"hello world");
+    }
+
+    #[test]
+    fn test_dechunk_single() {
+        let chunked = b"b\r\nhello world\r\n0\r\n\r\n";
+        let result = dechunk(chunked);
+        assert_eq!(result, b"hello world");
+    }
+
+    #[test]
+    fn test_rechunk() {
+        let data = b"hello world";
+        let chunked = rechunk(data);
+        let expected = b"b\r\nhello world\r\n0\r\n\r\n";
+        assert_eq!(chunked, expected);
+    }
+
+    #[test]
+    fn test_dechunk_rechunk_roundtrip() {
+        let original = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
+        let data = dechunk(original);
+        let rechunked = rechunk(&data);
+        let data2 = dechunk(&rechunked);
+        assert_eq!(data, data2);
+    }
+
+    #[test]
+    fn test_modify_strips_tools() {
+        let body = serde_json::json!({
+            "project": "test",
+            "requestId": "test/1",
+            "request": {
+                "contents": [{"role": "user", "parts": [{"text": "hello"}]}],
+                "tools": [
+                    {"functionDeclarations": [{"name": "view_file", "description": "view", "parameters": {}}]},
+                    {"functionDeclarations": [{"name": "browser_subagent", "description": "browse", "parameters": {}}]},
+                    {"functionDeclarations": [{"name": "grep_search", "description": "grep", "parameters": {}}]},
+                    {"functionDeclarations": [{"name": "generate_image", "description": "img", "parameters": {}}]},
+                ],
+                "generationConfig": {}
+            },
+            "model": "test"
+        });
+
+        let bytes = serde_json::to_vec(&body).unwrap();
+        let modified = modify_request(&bytes).unwrap();
+        let result: Value = serde_json::from_slice(&modified).unwrap();
+
+        let tool_names: Vec<&str> = result["request"]["tools"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .map(|t| t["functionDeclarations"][0]["name"].as_str().unwrap())
+            .collect();
+
+        assert!(tool_names.contains(&"view_file"));
+        assert!(tool_names.contains(&"grep_search"));
+        assert!(!tool_names.contains(&"browser_subagent"));
+        assert!(!tool_names.contains(&"generate_image"));
+    }
+
+    #[test]
+    fn test_modify_strips_system_sections() {
+        let sys_text = "<identity>I am an AI</identity>\n<web_application_development>lots of web dev stuff here</web_application_development>\n<communication_style>be helpful</communication_style>";
+        let body = serde_json::json!({
+            "project": "test",
+            "requestId": "test/1",
+            "request": {
+                "contents": [{"role": "user", "parts": [{"text": "hello"}]}],
+                "systemInstruction": {"parts": [{"text": sys_text}]},
+                "tools": [],
+                "generationConfig": {}
+            },
+            "model": "test"
+        });
+
+        let bytes = serde_json::to_vec(&body).unwrap();
+        let modified = modify_request(&bytes).unwrap();
+        let result: Value = serde_json::from_slice(&modified).unwrap();
+
+        let new_sys = result["request"]["systemInstruction"]["parts"][0]["text"]
+            .as_str()
+            .unwrap();
+
+        assert!(new_sys.contains("<identity>"));
+        assert!(new_sys.contains("<communication_style>"));
+        assert!(!new_sys.contains("web_application_development"));
+        assert!(!new_sys.contains("lots of web dev stuff"));
+    }
+
+    #[test]
+    fn test_modify_strips_empty_user_rules() {
+        let body = serde_json::json!({
+            "project": "test",
+            "requestId": "test/1",
+            "request": {
+                "contents": [
+                    {"role": "user", "parts": [{"text": "<user_rules>\nThe user has not defined any custom rules.\n</user_rules>"}]},
+                    {"role": "user", "parts": [{"text": "hello world"}]},
+                ],
+                "tools": [],
+                "generationConfig": {}
+            },
+            "model": "test"
+        });
+
+        let bytes = serde_json::to_vec(&body).unwrap();
+        let modified = modify_request(&bytes).unwrap();
+        let result: Value = serde_json::from_slice(&modified).unwrap();
+
+        let contents = result["request"]["contents"].as_array().unwrap();
+        assert_eq!(contents.len(), 1);
+        assert_eq!(contents[0]["parts"][0]["text"].as_str().unwrap(), "hello world");
+    }
+}
--- a/src/mitm/proxy.rs
+++ b/src/mitm/proxy.rs
@@ -363,7 +363,7 @@ async fn handle_http_over_tls(
    mut client: tokio_rustls::server::TlsStream<TcpStream>,
    domain: &str,
    store: MitmStore,
-    _modify_requests: bool,
+    modify_requests: bool,
 ) -> Result<(), String> {
    let mut tmp = vec![0u8; 32768];

@@ -535,12 +535,36 @@ async fn handle_http_over_tls(

        // Log LLM calls at info, everything else at debug
        if req_path.contains("streamGenerateContent") {
+            let body_len = request_buf.len() - headers_end;
            info!(
                domain,
                req_path = %req_path,
+                body_len,
                cascade = ?cascade_hint,
                "MITM: forwarding LLM request"
            );
+
+            // ── Request modification ─────────────────────────────────────
+            // Dechunk body → check if agent request → modify → rechunk
+            if modify_requests && body_len > 0 {
+                let body_slice = &request_buf[headers_end..];
+                let raw_body = super::modify::dechunk(body_slice);
+
+                // Only modify "agent" requests, not "checkpoint" (LS internal)
+                let is_agent = raw_body
+                    .windows(20)
+                    .any(|w| w == b"\"requestType\":\"agent" || w == b"requestType\":\"agent\"");
+
+                if is_agent {
+                    if let Some(modified_body) = super::modify::modify_request(&raw_body) {
+                        // Rebuild request_buf: original headers + rechunked modified body
+                        let new_chunked = super::modify::rechunk(&modified_body);
+                        let mut new_buf = request_buf[..headers_end].to_vec();
+                        new_buf.extend_from_slice(&new_chunked);
+                        request_buf = new_buf;
+                    }
+                }
+            }
        } else {
            debug!(
                domain,