feat: MITM request modification — strip bloat from LLM API requests

Intercepts streamGenerateContent requests and trims:
- System instruction: strips web_application_development, knowledge_discovery,
  persistent_context, skills sections (~18KB saved)
- Content messages: strips empty user_rules, workflows boilerplate,
  conversation summaries (~4.5KB saved)
- Tools: keeps 12 essential coding tools, strips 8 non-essential
  (browser_subagent, generate_image, search_web, etc. ~6KB saved)

Total: ~55% reduction in request size while keeping identity, user info,
and all coding-relevant tools intact. Only modifies 'agent' type requests,
checkpoint requests pass through unmodified.

Also:
- Standalone mode is now the default (use --no-standalone to attach to
  existing LS)
- Enable request modification by default
- Add mold linker, sccache, nextest config (8 thread cap)
- Add .cargo/config.toml and .config/nextest.toml
This commit is contained in:
Nikketryhard
2026-02-14 18:35:07 -06:00
parent 061b08fc8f
commit f0c2574c88
7 changed files with 391 additions and 6 deletions

7
.cargo/config.toml Normal file
View File

@@ -0,0 +1,7 @@
[target.x86_64-unknown-linux-gnu]
linker = "clang"
rustflags = ["-C", "link-arg=-fuse-ld=mold"]
[build]
rustc-wrapper = "sccache"
jobs = 8

7
.config/nextest.toml Normal file
View File

@@ -0,0 +1,7 @@
[store]
# Cap test threads to 8
threads = 8
[profile.default]
retries = 0
slow-timeout = { period = "30s" }

1
.gitignore vendored
View File

@@ -6,3 +6,4 @@
*.txt
!README.txt
test_output.json
captured-request-*.json

View File

@@ -46,9 +46,9 @@ struct Cli {
#[arg(long, default_value_t = 8742)]
mitm_port: u16,
/// Use a standalone LS (does not touch the real LS)
/// Disable standalone LS — attach to the real running LS instead
#[arg(long)]
standalone: bool,
no_standalone: bool,
}
#[tokio::main]
@@ -91,7 +91,7 @@ async fn main() {
};
// ── Step 2: Backend discovery (or standalone LS spawn) ─────────────────────
let standalone_ls = if cli.standalone {
let standalone_ls = if !cli.no_standalone {
// Standalone mode: discover main LS config, spawn our own
let main_config = match standalone::discover_main_ls_config() {
Ok(c) => c,
@@ -182,7 +182,7 @@ async fn main() {
let ca_pem = ca.ca_pem_path.display().to_string();
let config = mitm::proxy::MitmConfig {
port: cli.mitm_port,
modify_requests: false,
modify_requests: true,
};
match mitm::proxy::run(ca, mitm_store.clone(), config).await {
Ok((port, handle)) => {
@@ -228,7 +228,7 @@ async fn main() {
// Periodic backend refresh — keeps LS connection details fresh
// (skip in standalone mode — the port is fixed and discover() would overwrite it)
let is_standalone = cli.standalone;
let is_standalone = !cli.no_standalone;
let refresh_backend = Arc::clone(&state.backend);
let refresh_handle = tokio::spawn(async move {
if is_standalone {

View File

@@ -14,6 +14,7 @@
pub mod ca;
pub mod h2_handler;
pub mod intercept;
pub mod modify;
pub mod proto;
pub mod proxy;
pub mod store;

345
src/mitm/modify.rs Normal file
View File

@@ -0,0 +1,345 @@
//! Request body modification for intercepted LLM API calls.
//!
//! Strips redundant/verbose sections from the Google Gemini API request
//! to reduce token usage while keeping the request looking legitimate.
//! Nothing structural changes — just trimming fat.
use serde_json::Value;
use tracing::info;
/// Tools to KEEP — essential coding tools. Everything else gets stripped.
const KEEP_TOOLS: &[&str] = &[
"view_file",
"write_to_file",
"replace_file_content",
"multi_replace_file_content",
"run_command",
"command_status",
"send_command_input",
"grep_search",
"find_by_name",
"list_dir",
"view_file_outline",
"view_code_item",
];
/// System instruction sections to STRIP (matched by XML tag name).
/// These are verbose instructional manuals that add tokens but don't
/// meaningfully affect output quality for coding tasks.
const STRIP_SYSTEM_SECTIONS: &[&str] = &[
"web_application_development",
"knowledge_discovery",
"persistent_context",
"skills",
];
/// Content message patterns to strip entirely.
/// These appear as separate `contents[]` entries with recognizable prefixes.
const STRIP_CONTENT_PREFIXES: &[&str] = &[
"<user_rules>\nThe user has not defined any custom rules.",
"<workflows>\n",
];
/// Modify a streamGenerateContent request body in-place.
/// Returns the modified JSON bytes, or None if modification wasn't possible.
pub fn modify_request(body: &[u8]) -> Option<Vec<u8>> {
let mut json: Value = serde_json::from_slice(body).ok()?;
let original_size = body.len();
let mut changes: Vec<String> = Vec::new();
// ── 1. Strip verbose system instruction sections ──────────────────────
if let Some(sys) = json
.pointer_mut("/request/systemInstruction/parts/0/text")
.and_then(|v| v.as_str())
.map(|s| s.to_string())
{
let mut modified = sys.clone();
for section in STRIP_SYSTEM_SECTIONS {
let pattern = format!("<{section}>");
let end_pattern = format!("</{section}>");
if let (Some(start), Some(end)) = (modified.find(&pattern), modified.find(&end_pattern))
{
let end_pos = end + end_pattern.len();
let removed = end_pos - start;
modified = format!("{}{}", &modified[..start], &modified[end_pos..]);
changes.push(format!("strip <{section}> ({removed} chars)"));
}
}
if modified.len() != sys.len() {
json["request"]["systemInstruction"]["parts"][0]["text"] =
Value::String(modified);
}
}
// ── 2. Strip bloated content messages ─────────────────────────────────
if let Some(contents) = json
.pointer_mut("/request/contents")
.and_then(|v| v.as_array_mut())
{
let before = contents.len();
// Remove messages matching strip prefixes
contents.retain(|msg| {
if let Some(text) = msg["parts"][0]["text"].as_str() {
for prefix in STRIP_CONTENT_PREFIXES {
if text.starts_with(prefix) {
return false;
}
}
}
true
});
// Strip conversation summaries from remaining messages
// These appear as "# Conversation History\nHere are the conversation IDs..."
for msg in contents.iter_mut() {
if let Some(text) = msg["parts"][0]["text"].as_str().map(|s| s.to_string()) {
if let Some(start) = text.find("# Conversation History\n") {
// Find the end of the conversation summaries block
let end_marker = "</conversation_summaries>";
let trimmed = if let Some(end) = text.find(end_marker) {
let end_pos = end + end_marker.len();
// Find next non-whitespace after end marker
let rest = text[end_pos..].trim_start();
format!("{}{}", &text[..start], rest)
} else {
// No end marker — just cut from "# Conversation History" onward
text[..start].trim_end().to_string()
};
if trimmed.len() < text.len() {
let saved = text.len() - trimmed.len();
changes.push(format!("strip conversation summaries ({saved} chars)"));
msg["parts"][0]["text"] = Value::String(trimmed);
}
}
}
}
let removed_msgs = before - contents.len();
if removed_msgs > 0 {
changes.push(format!("remove {removed_msgs} content messages"));
}
}
// ── 3. Strip non-essential tools ─────────────────────────────────────
if let Some(tools) = json
.pointer_mut("/request/tools")
.and_then(|v| v.as_array_mut())
{
let before = tools.len();
tools.retain(|tool| {
if let Some(name) = tool["functionDeclarations"][0]["name"].as_str() {
KEEP_TOOLS.contains(&name)
} else {
true // keep unknown structure
}
});
let removed = before - tools.len();
if removed > 0 {
changes.push(format!("strip {removed}/{before} tools (keep {})", KEEP_TOOLS.len()));
}
}
if changes.is_empty() {
return None; // Nothing modified
}
let modified_bytes = serde_json::to_vec(&json).ok()?;
let saved = original_size as i64 - modified_bytes.len() as i64;
let pct = if original_size > 0 {
(saved as f64 / original_size as f64 * 100.0) as i32
} else {
0
};
info!(
original = original_size,
modified = modified_bytes.len(),
saved_bytes = saved,
saved_pct = pct,
"MITM: request modified [{}]",
changes.join(", ")
);
Some(modified_bytes)
}
/// Dechunk an HTTP chunked-encoded body into raw bytes.
/// Input: "hex_size\r\n data\r\n hex_size\r\n data\r\n 0\r\n\r\n"
/// Output: concatenated data segments.
pub fn dechunk(data: &[u8]) -> Vec<u8> {
let mut result = Vec::with_capacity(data.len());
let mut pos = 0;
while pos < data.len() {
// Find end of chunk size line
let line_end = match data[pos..].windows(2).position(|w| w == b"\r\n") {
Some(p) => pos + p,
None => break,
};
// Parse hex chunk size (ignore chunk extensions after ';')
let size_str = std::str::from_utf8(&data[pos..line_end])
.unwrap_or("")
.split(';')
.next()
.unwrap_or("")
.trim();
let chunk_size = match usize::from_str_radix(size_str, 16) {
Ok(0) => break, // Terminal chunk
Ok(n) => n,
Err(_) => break,
};
let data_start = line_end + 2; // skip \r\n
let data_end = (data_start + chunk_size).min(data.len());
result.extend_from_slice(&data[data_start..data_end]);
// Skip past data + trailing \r\n
pos = data_end + 2;
}
result
}
/// Re-encode data as a single HTTP chunk + terminal chunk.
pub fn rechunk(data: &[u8]) -> Vec<u8> {
let hex_size = format!("{:x}", data.len());
let mut result = Vec::with_capacity(hex_size.len() + 2 + data.len() + 2 + 5);
result.extend_from_slice(hex_size.as_bytes());
result.extend_from_slice(b"\r\n");
result.extend_from_slice(data);
result.extend_from_slice(b"\r\n0\r\n\r\n");
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dechunk_basic() {
let chunked = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
let result = dechunk(chunked);
assert_eq!(result, b"hello world");
}
#[test]
fn test_dechunk_single() {
let chunked = b"b\r\nhello world\r\n0\r\n\r\n";
let result = dechunk(chunked);
assert_eq!(result, b"hello world");
}
#[test]
fn test_rechunk() {
let data = b"hello world";
let chunked = rechunk(data);
let expected = b"b\r\nhello world\r\n0\r\n\r\n";
assert_eq!(chunked, expected);
}
#[test]
fn test_dechunk_rechunk_roundtrip() {
let original = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
let data = dechunk(original);
let rechunked = rechunk(&data);
let data2 = dechunk(&rechunked);
assert_eq!(data, data2);
}
#[test]
fn test_modify_strips_tools() {
let body = serde_json::json!({
"project": "test",
"requestId": "test/1",
"request": {
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
"tools": [
{"functionDeclarations": [{"name": "view_file", "description": "view", "parameters": {}}]},
{"functionDeclarations": [{"name": "browser_subagent", "description": "browse", "parameters": {}}]},
{"functionDeclarations": [{"name": "grep_search", "description": "grep", "parameters": {}}]},
{"functionDeclarations": [{"name": "generate_image", "description": "img", "parameters": {}}]},
],
"generationConfig": {}
},
"model": "test"
});
let bytes = serde_json::to_vec(&body).unwrap();
let modified = modify_request(&bytes).unwrap();
let result: Value = serde_json::from_slice(&modified).unwrap();
let tool_names: Vec<&str> = result["request"]["tools"]
.as_array()
.unwrap()
.iter()
.map(|t| t["functionDeclarations"][0]["name"].as_str().unwrap())
.collect();
assert!(tool_names.contains(&"view_file"));
assert!(tool_names.contains(&"grep_search"));
assert!(!tool_names.contains(&"browser_subagent"));
assert!(!tool_names.contains(&"generate_image"));
}
#[test]
fn test_modify_strips_system_sections() {
let sys_text = "<identity>I am an AI</identity>\n<web_application_development>lots of web dev stuff here</web_application_development>\n<communication_style>be helpful</communication_style>";
let body = serde_json::json!({
"project": "test",
"requestId": "test/1",
"request": {
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
"systemInstruction": {"parts": [{"text": sys_text}]},
"tools": [],
"generationConfig": {}
},
"model": "test"
});
let bytes = serde_json::to_vec(&body).unwrap();
let modified = modify_request(&bytes).unwrap();
let result: Value = serde_json::from_slice(&modified).unwrap();
let new_sys = result["request"]["systemInstruction"]["parts"][0]["text"]
.as_str()
.unwrap();
assert!(new_sys.contains("<identity>"));
assert!(new_sys.contains("<communication_style>"));
assert!(!new_sys.contains("web_application_development"));
assert!(!new_sys.contains("lots of web dev stuff"));
}
#[test]
fn test_modify_strips_empty_user_rules() {
let body = serde_json::json!({
"project": "test",
"requestId": "test/1",
"request": {
"contents": [
{"role": "user", "parts": [{"text": "<user_rules>\nThe user has not defined any custom rules.\n</user_rules>"}]},
{"role": "user", "parts": [{"text": "hello world"}]},
],
"tools": [],
"generationConfig": {}
},
"model": "test"
});
let bytes = serde_json::to_vec(&body).unwrap();
let modified = modify_request(&bytes).unwrap();
let result: Value = serde_json::from_slice(&modified).unwrap();
let contents = result["request"]["contents"].as_array().unwrap();
assert_eq!(contents.len(), 1);
assert_eq!(contents[0]["parts"][0]["text"].as_str().unwrap(), "hello world");
}
}

View File

@@ -363,7 +363,7 @@ async fn handle_http_over_tls(
mut client: tokio_rustls::server::TlsStream<TcpStream>,
domain: &str,
store: MitmStore,
_modify_requests: bool,
modify_requests: bool,
) -> Result<(), String> {
let mut tmp = vec![0u8; 32768];
@@ -535,12 +535,36 @@ async fn handle_http_over_tls(
// Log LLM calls at info, everything else at debug
if req_path.contains("streamGenerateContent") {
let body_len = request_buf.len() - headers_end;
info!(
domain,
req_path = %req_path,
body_len,
cascade = ?cascade_hint,
"MITM: forwarding LLM request"
);
// ── Request modification ─────────────────────────────────────
// Dechunk body → check if agent request → modify → rechunk
if modify_requests && body_len > 0 {
let body_slice = &request_buf[headers_end..];
let raw_body = super::modify::dechunk(body_slice);
// Only modify "agent" requests, not "checkpoint" (LS internal)
let is_agent = raw_body
.windows(20)
.any(|w| w == b"\"requestType\":\"agent" || w == b"requestType\":\"agent\"");
if is_agent {
if let Some(modified_body) = super::modify::modify_request(&raw_body) {
// Rebuild request_buf: original headers + rechunked modified body
let new_chunked = super::modify::rechunk(&modified_body);
let mut new_buf = request_buf[..headers_end].to_vec();
new_buf.extend_from_slice(&new_chunked);
request_buf = new_buf;
}
}
}
} else {
debug!(
domain,