Intercepts streamGenerateContent requests and trims: - System instruction: strips web_application_development, knowledge_discovery, persistent_context, skills sections (~18KB saved) - Content messages: strips empty user_rules, workflows boilerplate, conversation summaries (~4.5KB saved) - Tools: keeps 12 essential coding tools, strips 8 non-essential (browser_subagent, generate_image, search_web, etc. ~6KB saved) Total: ~55% reduction in request size while keeping identity, user info, and all coding-relevant tools intact. Only modifies 'agent' type requests, checkpoint requests pass through unmodified. Also: - Standalone mode is now the default (use --no-standalone to attach to existing LS) - Enable request modification by default - Add mold linker, sccache, nextest config (8 thread cap) - Add .cargo/config.toml and .config/nextest.toml
346 lines
12 KiB
Rust
346 lines
12 KiB
Rust
//! Request body modification for intercepted LLM API calls.
|
|
//!
|
|
//! Strips redundant/verbose sections from the Google Gemini API request
|
|
//! to reduce token usage while keeping the request looking legitimate.
|
|
//! Nothing structural changes — just trimming fat.
|
|
|
|
use serde_json::Value;
|
|
use tracing::info;
|
|
|
|
/// Tools to KEEP — essential coding tools. Everything else gets stripped.
|
|
const KEEP_TOOLS: &[&str] = &[
|
|
"view_file",
|
|
"write_to_file",
|
|
"replace_file_content",
|
|
"multi_replace_file_content",
|
|
"run_command",
|
|
"command_status",
|
|
"send_command_input",
|
|
"grep_search",
|
|
"find_by_name",
|
|
"list_dir",
|
|
"view_file_outline",
|
|
"view_code_item",
|
|
];
|
|
|
|
/// System instruction sections to STRIP (matched by XML tag name).
|
|
/// These are verbose instructional manuals that add tokens but don't
|
|
/// meaningfully affect output quality for coding tasks.
|
|
const STRIP_SYSTEM_SECTIONS: &[&str] = &[
|
|
"web_application_development",
|
|
"knowledge_discovery",
|
|
"persistent_context",
|
|
"skills",
|
|
];
|
|
|
|
/// Content message patterns to strip entirely.
|
|
/// These appear as separate `contents[]` entries with recognizable prefixes.
|
|
const STRIP_CONTENT_PREFIXES: &[&str] = &[
|
|
"<user_rules>\nThe user has not defined any custom rules.",
|
|
"<workflows>\n",
|
|
];
|
|
|
|
/// Modify a streamGenerateContent request body in-place.
|
|
/// Returns the modified JSON bytes, or None if modification wasn't possible.
|
|
pub fn modify_request(body: &[u8]) -> Option<Vec<u8>> {
|
|
let mut json: Value = serde_json::from_slice(body).ok()?;
|
|
|
|
let original_size = body.len();
|
|
let mut changes: Vec<String> = Vec::new();
|
|
|
|
// ── 1. Strip verbose system instruction sections ──────────────────────
|
|
if let Some(sys) = json
|
|
.pointer_mut("/request/systemInstruction/parts/0/text")
|
|
.and_then(|v| v.as_str())
|
|
.map(|s| s.to_string())
|
|
{
|
|
let mut modified = sys.clone();
|
|
for section in STRIP_SYSTEM_SECTIONS {
|
|
let pattern = format!("<{section}>");
|
|
let end_pattern = format!("</{section}>");
|
|
if let (Some(start), Some(end)) = (modified.find(&pattern), modified.find(&end_pattern))
|
|
{
|
|
let end_pos = end + end_pattern.len();
|
|
let removed = end_pos - start;
|
|
modified = format!("{}{}", &modified[..start], &modified[end_pos..]);
|
|
changes.push(format!("strip <{section}> ({removed} chars)"));
|
|
}
|
|
}
|
|
|
|
if modified.len() != sys.len() {
|
|
json["request"]["systemInstruction"]["parts"][0]["text"] =
|
|
Value::String(modified);
|
|
}
|
|
}
|
|
|
|
// ── 2. Strip bloated content messages ─────────────────────────────────
|
|
if let Some(contents) = json
|
|
.pointer_mut("/request/contents")
|
|
.and_then(|v| v.as_array_mut())
|
|
{
|
|
let before = contents.len();
|
|
|
|
// Remove messages matching strip prefixes
|
|
contents.retain(|msg| {
|
|
if let Some(text) = msg["parts"][0]["text"].as_str() {
|
|
for prefix in STRIP_CONTENT_PREFIXES {
|
|
if text.starts_with(prefix) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
true
|
|
});
|
|
|
|
// Strip conversation summaries from remaining messages
|
|
// These appear as "# Conversation History\nHere are the conversation IDs..."
|
|
for msg in contents.iter_mut() {
|
|
if let Some(text) = msg["parts"][0]["text"].as_str().map(|s| s.to_string()) {
|
|
if let Some(start) = text.find("# Conversation History\n") {
|
|
// Find the end of the conversation summaries block
|
|
let end_marker = "</conversation_summaries>";
|
|
let trimmed = if let Some(end) = text.find(end_marker) {
|
|
let end_pos = end + end_marker.len();
|
|
// Find next non-whitespace after end marker
|
|
let rest = text[end_pos..].trim_start();
|
|
format!("{}{}", &text[..start], rest)
|
|
} else {
|
|
// No end marker — just cut from "# Conversation History" onward
|
|
text[..start].trim_end().to_string()
|
|
};
|
|
|
|
if trimmed.len() < text.len() {
|
|
let saved = text.len() - trimmed.len();
|
|
changes.push(format!("strip conversation summaries ({saved} chars)"));
|
|
msg["parts"][0]["text"] = Value::String(trimmed);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let removed_msgs = before - contents.len();
|
|
if removed_msgs > 0 {
|
|
changes.push(format!("remove {removed_msgs} content messages"));
|
|
}
|
|
}
|
|
|
|
// ── 3. Strip non-essential tools ─────────────────────────────────────
|
|
if let Some(tools) = json
|
|
.pointer_mut("/request/tools")
|
|
.and_then(|v| v.as_array_mut())
|
|
{
|
|
let before = tools.len();
|
|
|
|
tools.retain(|tool| {
|
|
if let Some(name) = tool["functionDeclarations"][0]["name"].as_str() {
|
|
KEEP_TOOLS.contains(&name)
|
|
} else {
|
|
true // keep unknown structure
|
|
}
|
|
});
|
|
|
|
let removed = before - tools.len();
|
|
if removed > 0 {
|
|
changes.push(format!("strip {removed}/{before} tools (keep {})", KEEP_TOOLS.len()));
|
|
}
|
|
}
|
|
|
|
if changes.is_empty() {
|
|
return None; // Nothing modified
|
|
}
|
|
|
|
let modified_bytes = serde_json::to_vec(&json).ok()?;
|
|
let saved = original_size as i64 - modified_bytes.len() as i64;
|
|
let pct = if original_size > 0 {
|
|
(saved as f64 / original_size as f64 * 100.0) as i32
|
|
} else {
|
|
0
|
|
};
|
|
|
|
info!(
|
|
original = original_size,
|
|
modified = modified_bytes.len(),
|
|
saved_bytes = saved,
|
|
saved_pct = pct,
|
|
"MITM: request modified [{}]",
|
|
changes.join(", ")
|
|
);
|
|
|
|
Some(modified_bytes)
|
|
}
|
|
|
|
/// Dechunk an HTTP chunked-encoded body into raw bytes.
|
|
/// Input: "hex_size\r\n data\r\n hex_size\r\n data\r\n 0\r\n\r\n"
|
|
/// Output: concatenated data segments.
|
|
pub fn dechunk(data: &[u8]) -> Vec<u8> {
|
|
let mut result = Vec::with_capacity(data.len());
|
|
let mut pos = 0;
|
|
|
|
while pos < data.len() {
|
|
// Find end of chunk size line
|
|
let line_end = match data[pos..].windows(2).position(|w| w == b"\r\n") {
|
|
Some(p) => pos + p,
|
|
None => break,
|
|
};
|
|
|
|
// Parse hex chunk size (ignore chunk extensions after ';')
|
|
let size_str = std::str::from_utf8(&data[pos..line_end])
|
|
.unwrap_or("")
|
|
.split(';')
|
|
.next()
|
|
.unwrap_or("")
|
|
.trim();
|
|
|
|
let chunk_size = match usize::from_str_radix(size_str, 16) {
|
|
Ok(0) => break, // Terminal chunk
|
|
Ok(n) => n,
|
|
Err(_) => break,
|
|
};
|
|
|
|
let data_start = line_end + 2; // skip \r\n
|
|
let data_end = (data_start + chunk_size).min(data.len());
|
|
result.extend_from_slice(&data[data_start..data_end]);
|
|
|
|
// Skip past data + trailing \r\n
|
|
pos = data_end + 2;
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Re-encode data as a single HTTP chunk + terminal chunk.
|
|
pub fn rechunk(data: &[u8]) -> Vec<u8> {
|
|
let hex_size = format!("{:x}", data.len());
|
|
let mut result = Vec::with_capacity(hex_size.len() + 2 + data.len() + 2 + 5);
|
|
result.extend_from_slice(hex_size.as_bytes());
|
|
result.extend_from_slice(b"\r\n");
|
|
result.extend_from_slice(data);
|
|
result.extend_from_slice(b"\r\n0\r\n\r\n");
|
|
result
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_dechunk_basic() {
|
|
let chunked = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
|
|
let result = dechunk(chunked);
|
|
assert_eq!(result, b"hello world");
|
|
}
|
|
|
|
#[test]
|
|
fn test_dechunk_single() {
|
|
let chunked = b"b\r\nhello world\r\n0\r\n\r\n";
|
|
let result = dechunk(chunked);
|
|
assert_eq!(result, b"hello world");
|
|
}
|
|
|
|
#[test]
|
|
fn test_rechunk() {
|
|
let data = b"hello world";
|
|
let chunked = rechunk(data);
|
|
let expected = b"b\r\nhello world\r\n0\r\n\r\n";
|
|
assert_eq!(chunked, expected);
|
|
}
|
|
|
|
#[test]
|
|
fn test_dechunk_rechunk_roundtrip() {
|
|
let original = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
|
|
let data = dechunk(original);
|
|
let rechunked = rechunk(&data);
|
|
let data2 = dechunk(&rechunked);
|
|
assert_eq!(data, data2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_modify_strips_tools() {
|
|
let body = serde_json::json!({
|
|
"project": "test",
|
|
"requestId": "test/1",
|
|
"request": {
|
|
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
|
|
"tools": [
|
|
{"functionDeclarations": [{"name": "view_file", "description": "view", "parameters": {}}]},
|
|
{"functionDeclarations": [{"name": "browser_subagent", "description": "browse", "parameters": {}}]},
|
|
{"functionDeclarations": [{"name": "grep_search", "description": "grep", "parameters": {}}]},
|
|
{"functionDeclarations": [{"name": "generate_image", "description": "img", "parameters": {}}]},
|
|
],
|
|
"generationConfig": {}
|
|
},
|
|
"model": "test"
|
|
});
|
|
|
|
let bytes = serde_json::to_vec(&body).unwrap();
|
|
let modified = modify_request(&bytes).unwrap();
|
|
let result: Value = serde_json::from_slice(&modified).unwrap();
|
|
|
|
let tool_names: Vec<&str> = result["request"]["tools"]
|
|
.as_array()
|
|
.unwrap()
|
|
.iter()
|
|
.map(|t| t["functionDeclarations"][0]["name"].as_str().unwrap())
|
|
.collect();
|
|
|
|
assert!(tool_names.contains(&"view_file"));
|
|
assert!(tool_names.contains(&"grep_search"));
|
|
assert!(!tool_names.contains(&"browser_subagent"));
|
|
assert!(!tool_names.contains(&"generate_image"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_modify_strips_system_sections() {
|
|
let sys_text = "<identity>I am an AI</identity>\n<web_application_development>lots of web dev stuff here</web_application_development>\n<communication_style>be helpful</communication_style>";
|
|
let body = serde_json::json!({
|
|
"project": "test",
|
|
"requestId": "test/1",
|
|
"request": {
|
|
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
|
|
"systemInstruction": {"parts": [{"text": sys_text}]},
|
|
"tools": [],
|
|
"generationConfig": {}
|
|
},
|
|
"model": "test"
|
|
});
|
|
|
|
let bytes = serde_json::to_vec(&body).unwrap();
|
|
let modified = modify_request(&bytes).unwrap();
|
|
let result: Value = serde_json::from_slice(&modified).unwrap();
|
|
|
|
let new_sys = result["request"]["systemInstruction"]["parts"][0]["text"]
|
|
.as_str()
|
|
.unwrap();
|
|
|
|
assert!(new_sys.contains("<identity>"));
|
|
assert!(new_sys.contains("<communication_style>"));
|
|
assert!(!new_sys.contains("web_application_development"));
|
|
assert!(!new_sys.contains("lots of web dev stuff"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_modify_strips_empty_user_rules() {
|
|
let body = serde_json::json!({
|
|
"project": "test",
|
|
"requestId": "test/1",
|
|
"request": {
|
|
"contents": [
|
|
{"role": "user", "parts": [{"text": "<user_rules>\nThe user has not defined any custom rules.\n</user_rules>"}]},
|
|
{"role": "user", "parts": [{"text": "hello world"}]},
|
|
],
|
|
"tools": [],
|
|
"generationConfig": {}
|
|
},
|
|
"model": "test"
|
|
});
|
|
|
|
let bytes = serde_json::to_vec(&body).unwrap();
|
|
let modified = modify_request(&bytes).unwrap();
|
|
let result: Value = serde_json::from_slice(&modified).unwrap();
|
|
|
|
let contents = result["request"]["contents"].as_array().unwrap();
|
|
assert_eq!(contents.len(), 1);
|
|
assert_eq!(contents[0]["parts"][0]["text"].as_str().unwrap(), "hello world");
|
|
}
|
|
}
|