feat: MITM request modification — strip bloat from LLM API requests
Intercepts streamGenerateContent requests and trims: - System instruction: strips web_application_development, knowledge_discovery, persistent_context, skills sections (~18KB saved) - Content messages: strips empty user_rules, workflows boilerplate, conversation summaries (~4.5KB saved) - Tools: keeps 12 essential coding tools, strips 8 non-essential (browser_subagent, generate_image, search_web, etc. ~6KB saved) Total: ~55% reduction in request size while keeping identity, user info, and all coding-relevant tools intact. Only modifies 'agent' type requests, checkpoint requests pass through unmodified. Also: - Standalone mode is now the default (use --no-standalone to attach to existing LS) - Enable request modification by default - Add mold linker, sccache, nextest config (8 thread cap) - Add .cargo/config.toml and .config/nextest.toml
This commit is contained in:
10
src/main.rs
10
src/main.rs
@@ -46,9 +46,9 @@ struct Cli {
|
||||
#[arg(long, default_value_t = 8742)]
|
||||
mitm_port: u16,
|
||||
|
||||
/// Use a standalone LS (does not touch the real LS)
|
||||
/// Disable standalone LS — attach to the real running LS instead
|
||||
#[arg(long)]
|
||||
standalone: bool,
|
||||
no_standalone: bool,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
@@ -91,7 +91,7 @@ async fn main() {
|
||||
};
|
||||
|
||||
// ── Step 2: Backend discovery (or standalone LS spawn) ─────────────────────
|
||||
let standalone_ls = if cli.standalone {
|
||||
let standalone_ls = if !cli.no_standalone {
|
||||
// Standalone mode: discover main LS config, spawn our own
|
||||
let main_config = match standalone::discover_main_ls_config() {
|
||||
Ok(c) => c,
|
||||
@@ -182,7 +182,7 @@ async fn main() {
|
||||
let ca_pem = ca.ca_pem_path.display().to_string();
|
||||
let config = mitm::proxy::MitmConfig {
|
||||
port: cli.mitm_port,
|
||||
modify_requests: false,
|
||||
modify_requests: true,
|
||||
};
|
||||
match mitm::proxy::run(ca, mitm_store.clone(), config).await {
|
||||
Ok((port, handle)) => {
|
||||
@@ -228,7 +228,7 @@ async fn main() {
|
||||
|
||||
// Periodic backend refresh — keeps LS connection details fresh
|
||||
// (skip in standalone mode — the port is fixed and discover() would overwrite it)
|
||||
let is_standalone = cli.standalone;
|
||||
let is_standalone = !cli.no_standalone;
|
||||
let refresh_backend = Arc::clone(&state.backend);
|
||||
let refresh_handle = tokio::spawn(async move {
|
||||
if is_standalone {
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
pub mod ca;
|
||||
pub mod h2_handler;
|
||||
pub mod intercept;
|
||||
pub mod modify;
|
||||
pub mod proto;
|
||||
pub mod proxy;
|
||||
pub mod store;
|
||||
|
||||
345
src/mitm/modify.rs
Normal file
345
src/mitm/modify.rs
Normal file
@@ -0,0 +1,345 @@
|
||||
//! Request body modification for intercepted LLM API calls.
|
||||
//!
|
||||
//! Strips redundant/verbose sections from the Google Gemini API request
|
||||
//! to reduce token usage while keeping the request looking legitimate.
|
||||
//! Nothing structural changes — just trimming fat.
|
||||
|
||||
use serde_json::Value;
|
||||
use tracing::info;
|
||||
|
||||
/// Tools to KEEP — essential coding tools. Everything else gets stripped.
|
||||
const KEEP_TOOLS: &[&str] = &[
|
||||
"view_file",
|
||||
"write_to_file",
|
||||
"replace_file_content",
|
||||
"multi_replace_file_content",
|
||||
"run_command",
|
||||
"command_status",
|
||||
"send_command_input",
|
||||
"grep_search",
|
||||
"find_by_name",
|
||||
"list_dir",
|
||||
"view_file_outline",
|
||||
"view_code_item",
|
||||
];
|
||||
|
||||
/// System instruction sections to STRIP (matched by XML tag name).
|
||||
/// These are verbose instructional manuals that add tokens but don't
|
||||
/// meaningfully affect output quality for coding tasks.
|
||||
const STRIP_SYSTEM_SECTIONS: &[&str] = &[
|
||||
"web_application_development",
|
||||
"knowledge_discovery",
|
||||
"persistent_context",
|
||||
"skills",
|
||||
];
|
||||
|
||||
/// Content message patterns to strip entirely.
|
||||
/// These appear as separate `contents[]` entries with recognizable prefixes.
|
||||
const STRIP_CONTENT_PREFIXES: &[&str] = &[
|
||||
"<user_rules>\nThe user has not defined any custom rules.",
|
||||
"<workflows>\n",
|
||||
];
|
||||
|
||||
/// Modify a streamGenerateContent request body in-place.
|
||||
/// Returns the modified JSON bytes, or None if modification wasn't possible.
|
||||
pub fn modify_request(body: &[u8]) -> Option<Vec<u8>> {
|
||||
let mut json: Value = serde_json::from_slice(body).ok()?;
|
||||
|
||||
let original_size = body.len();
|
||||
let mut changes: Vec<String> = Vec::new();
|
||||
|
||||
// ── 1. Strip verbose system instruction sections ──────────────────────
|
||||
if let Some(sys) = json
|
||||
.pointer_mut("/request/systemInstruction/parts/0/text")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| s.to_string())
|
||||
{
|
||||
let mut modified = sys.clone();
|
||||
for section in STRIP_SYSTEM_SECTIONS {
|
||||
let pattern = format!("<{section}>");
|
||||
let end_pattern = format!("</{section}>");
|
||||
if let (Some(start), Some(end)) = (modified.find(&pattern), modified.find(&end_pattern))
|
||||
{
|
||||
let end_pos = end + end_pattern.len();
|
||||
let removed = end_pos - start;
|
||||
modified = format!("{}{}", &modified[..start], &modified[end_pos..]);
|
||||
changes.push(format!("strip <{section}> ({removed} chars)"));
|
||||
}
|
||||
}
|
||||
|
||||
if modified.len() != sys.len() {
|
||||
json["request"]["systemInstruction"]["parts"][0]["text"] =
|
||||
Value::String(modified);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 2. Strip bloated content messages ─────────────────────────────────
|
||||
if let Some(contents) = json
|
||||
.pointer_mut("/request/contents")
|
||||
.and_then(|v| v.as_array_mut())
|
||||
{
|
||||
let before = contents.len();
|
||||
|
||||
// Remove messages matching strip prefixes
|
||||
contents.retain(|msg| {
|
||||
if let Some(text) = msg["parts"][0]["text"].as_str() {
|
||||
for prefix in STRIP_CONTENT_PREFIXES {
|
||||
if text.starts_with(prefix) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
// Strip conversation summaries from remaining messages
|
||||
// These appear as "# Conversation History\nHere are the conversation IDs..."
|
||||
for msg in contents.iter_mut() {
|
||||
if let Some(text) = msg["parts"][0]["text"].as_str().map(|s| s.to_string()) {
|
||||
if let Some(start) = text.find("# Conversation History\n") {
|
||||
// Find the end of the conversation summaries block
|
||||
let end_marker = "</conversation_summaries>";
|
||||
let trimmed = if let Some(end) = text.find(end_marker) {
|
||||
let end_pos = end + end_marker.len();
|
||||
// Find next non-whitespace after end marker
|
||||
let rest = text[end_pos..].trim_start();
|
||||
format!("{}{}", &text[..start], rest)
|
||||
} else {
|
||||
// No end marker — just cut from "# Conversation History" onward
|
||||
text[..start].trim_end().to_string()
|
||||
};
|
||||
|
||||
if trimmed.len() < text.len() {
|
||||
let saved = text.len() - trimmed.len();
|
||||
changes.push(format!("strip conversation summaries ({saved} chars)"));
|
||||
msg["parts"][0]["text"] = Value::String(trimmed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let removed_msgs = before - contents.len();
|
||||
if removed_msgs > 0 {
|
||||
changes.push(format!("remove {removed_msgs} content messages"));
|
||||
}
|
||||
}
|
||||
|
||||
// ── 3. Strip non-essential tools ─────────────────────────────────────
|
||||
if let Some(tools) = json
|
||||
.pointer_mut("/request/tools")
|
||||
.and_then(|v| v.as_array_mut())
|
||||
{
|
||||
let before = tools.len();
|
||||
|
||||
tools.retain(|tool| {
|
||||
if let Some(name) = tool["functionDeclarations"][0]["name"].as_str() {
|
||||
KEEP_TOOLS.contains(&name)
|
||||
} else {
|
||||
true // keep unknown structure
|
||||
}
|
||||
});
|
||||
|
||||
let removed = before - tools.len();
|
||||
if removed > 0 {
|
||||
changes.push(format!("strip {removed}/{before} tools (keep {})", KEEP_TOOLS.len()));
|
||||
}
|
||||
}
|
||||
|
||||
if changes.is_empty() {
|
||||
return None; // Nothing modified
|
||||
}
|
||||
|
||||
let modified_bytes = serde_json::to_vec(&json).ok()?;
|
||||
let saved = original_size as i64 - modified_bytes.len() as i64;
|
||||
let pct = if original_size > 0 {
|
||||
(saved as f64 / original_size as f64 * 100.0) as i32
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
info!(
|
||||
original = original_size,
|
||||
modified = modified_bytes.len(),
|
||||
saved_bytes = saved,
|
||||
saved_pct = pct,
|
||||
"MITM: request modified [{}]",
|
||||
changes.join(", ")
|
||||
);
|
||||
|
||||
Some(modified_bytes)
|
||||
}
|
||||
|
||||
/// Dechunk an HTTP chunked-encoded body into raw bytes.
|
||||
/// Input: "hex_size\r\n data\r\n hex_size\r\n data\r\n 0\r\n\r\n"
|
||||
/// Output: concatenated data segments.
|
||||
pub fn dechunk(data: &[u8]) -> Vec<u8> {
|
||||
let mut result = Vec::with_capacity(data.len());
|
||||
let mut pos = 0;
|
||||
|
||||
while pos < data.len() {
|
||||
// Find end of chunk size line
|
||||
let line_end = match data[pos..].windows(2).position(|w| w == b"\r\n") {
|
||||
Some(p) => pos + p,
|
||||
None => break,
|
||||
};
|
||||
|
||||
// Parse hex chunk size (ignore chunk extensions after ';')
|
||||
let size_str = std::str::from_utf8(&data[pos..line_end])
|
||||
.unwrap_or("")
|
||||
.split(';')
|
||||
.next()
|
||||
.unwrap_or("")
|
||||
.trim();
|
||||
|
||||
let chunk_size = match usize::from_str_radix(size_str, 16) {
|
||||
Ok(0) => break, // Terminal chunk
|
||||
Ok(n) => n,
|
||||
Err(_) => break,
|
||||
};
|
||||
|
||||
let data_start = line_end + 2; // skip \r\n
|
||||
let data_end = (data_start + chunk_size).min(data.len());
|
||||
result.extend_from_slice(&data[data_start..data_end]);
|
||||
|
||||
// Skip past data + trailing \r\n
|
||||
pos = data_end + 2;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Re-encode data as a single HTTP chunk + terminal chunk.
|
||||
pub fn rechunk(data: &[u8]) -> Vec<u8> {
|
||||
let hex_size = format!("{:x}", data.len());
|
||||
let mut result = Vec::with_capacity(hex_size.len() + 2 + data.len() + 2 + 5);
|
||||
result.extend_from_slice(hex_size.as_bytes());
|
||||
result.extend_from_slice(b"\r\n");
|
||||
result.extend_from_slice(data);
|
||||
result.extend_from_slice(b"\r\n0\r\n\r\n");
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_dechunk_basic() {
|
||||
let chunked = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
|
||||
let result = dechunk(chunked);
|
||||
assert_eq!(result, b"hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dechunk_single() {
|
||||
let chunked = b"b\r\nhello world\r\n0\r\n\r\n";
|
||||
let result = dechunk(chunked);
|
||||
assert_eq!(result, b"hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rechunk() {
|
||||
let data = b"hello world";
|
||||
let chunked = rechunk(data);
|
||||
let expected = b"b\r\nhello world\r\n0\r\n\r\n";
|
||||
assert_eq!(chunked, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dechunk_rechunk_roundtrip() {
|
||||
let original = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
|
||||
let data = dechunk(original);
|
||||
let rechunked = rechunk(&data);
|
||||
let data2 = dechunk(&rechunked);
|
||||
assert_eq!(data, data2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_modify_strips_tools() {
|
||||
let body = serde_json::json!({
|
||||
"project": "test",
|
||||
"requestId": "test/1",
|
||||
"request": {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
|
||||
"tools": [
|
||||
{"functionDeclarations": [{"name": "view_file", "description": "view", "parameters": {}}]},
|
||||
{"functionDeclarations": [{"name": "browser_subagent", "description": "browse", "parameters": {}}]},
|
||||
{"functionDeclarations": [{"name": "grep_search", "description": "grep", "parameters": {}}]},
|
||||
{"functionDeclarations": [{"name": "generate_image", "description": "img", "parameters": {}}]},
|
||||
],
|
||||
"generationConfig": {}
|
||||
},
|
||||
"model": "test"
|
||||
});
|
||||
|
||||
let bytes = serde_json::to_vec(&body).unwrap();
|
||||
let modified = modify_request(&bytes).unwrap();
|
||||
let result: Value = serde_json::from_slice(&modified).unwrap();
|
||||
|
||||
let tool_names: Vec<&str> = result["request"]["tools"]
|
||||
.as_array()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.map(|t| t["functionDeclarations"][0]["name"].as_str().unwrap())
|
||||
.collect();
|
||||
|
||||
assert!(tool_names.contains(&"view_file"));
|
||||
assert!(tool_names.contains(&"grep_search"));
|
||||
assert!(!tool_names.contains(&"browser_subagent"));
|
||||
assert!(!tool_names.contains(&"generate_image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_modify_strips_system_sections() {
|
||||
let sys_text = "<identity>I am an AI</identity>\n<web_application_development>lots of web dev stuff here</web_application_development>\n<communication_style>be helpful</communication_style>";
|
||||
let body = serde_json::json!({
|
||||
"project": "test",
|
||||
"requestId": "test/1",
|
||||
"request": {
|
||||
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
|
||||
"systemInstruction": {"parts": [{"text": sys_text}]},
|
||||
"tools": [],
|
||||
"generationConfig": {}
|
||||
},
|
||||
"model": "test"
|
||||
});
|
||||
|
||||
let bytes = serde_json::to_vec(&body).unwrap();
|
||||
let modified = modify_request(&bytes).unwrap();
|
||||
let result: Value = serde_json::from_slice(&modified).unwrap();
|
||||
|
||||
let new_sys = result["request"]["systemInstruction"]["parts"][0]["text"]
|
||||
.as_str()
|
||||
.unwrap();
|
||||
|
||||
assert!(new_sys.contains("<identity>"));
|
||||
assert!(new_sys.contains("<communication_style>"));
|
||||
assert!(!new_sys.contains("web_application_development"));
|
||||
assert!(!new_sys.contains("lots of web dev stuff"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_modify_strips_empty_user_rules() {
|
||||
let body = serde_json::json!({
|
||||
"project": "test",
|
||||
"requestId": "test/1",
|
||||
"request": {
|
||||
"contents": [
|
||||
{"role": "user", "parts": [{"text": "<user_rules>\nThe user has not defined any custom rules.\n</user_rules>"}]},
|
||||
{"role": "user", "parts": [{"text": "hello world"}]},
|
||||
],
|
||||
"tools": [],
|
||||
"generationConfig": {}
|
||||
},
|
||||
"model": "test"
|
||||
});
|
||||
|
||||
let bytes = serde_json::to_vec(&body).unwrap();
|
||||
let modified = modify_request(&bytes).unwrap();
|
||||
let result: Value = serde_json::from_slice(&modified).unwrap();
|
||||
|
||||
let contents = result["request"]["contents"].as_array().unwrap();
|
||||
assert_eq!(contents.len(), 1);
|
||||
assert_eq!(contents[0]["parts"][0]["text"].as_str().unwrap(), "hello world");
|
||||
}
|
||||
}
|
||||
@@ -363,7 +363,7 @@ async fn handle_http_over_tls(
|
||||
mut client: tokio_rustls::server::TlsStream<TcpStream>,
|
||||
domain: &str,
|
||||
store: MitmStore,
|
||||
_modify_requests: bool,
|
||||
modify_requests: bool,
|
||||
) -> Result<(), String> {
|
||||
let mut tmp = vec![0u8; 32768];
|
||||
|
||||
@@ -535,12 +535,36 @@ async fn handle_http_over_tls(
|
||||
|
||||
// Log LLM calls at info, everything else at debug
|
||||
if req_path.contains("streamGenerateContent") {
|
||||
let body_len = request_buf.len() - headers_end;
|
||||
info!(
|
||||
domain,
|
||||
req_path = %req_path,
|
||||
body_len,
|
||||
cascade = ?cascade_hint,
|
||||
"MITM: forwarding LLM request"
|
||||
);
|
||||
|
||||
// ── Request modification ─────────────────────────────────────
|
||||
// Dechunk body → check if agent request → modify → rechunk
|
||||
if modify_requests && body_len > 0 {
|
||||
let body_slice = &request_buf[headers_end..];
|
||||
let raw_body = super::modify::dechunk(body_slice);
|
||||
|
||||
// Only modify "agent" requests, not "checkpoint" (LS internal)
|
||||
let is_agent = raw_body
|
||||
.windows(20)
|
||||
.any(|w| w == b"\"requestType\":\"agent" || w == b"requestType\":\"agent\"");
|
||||
|
||||
if is_agent {
|
||||
if let Some(modified_body) = super::modify::modify_request(&raw_body) {
|
||||
// Rebuild request_buf: original headers + rechunked modified body
|
||||
let new_chunked = super::modify::rechunk(&modified_body);
|
||||
let mut new_buf = request_buf[..headers_end].to_vec();
|
||||
new_buf.extend_from_slice(&new_chunked);
|
||||
request_buf = new_buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
debug!(
|
||||
domain,
|
||||
|
||||
Reference in New Issue
Block a user