- Add proxyctl CLI script for systemd service management - Add systemd user service file for background operation - Fix standalone LS kill: properly track real LS PID via pgrep and use sudo kill for cross-user cleanup on shutdown - Remove deprecated scripts (dns-redirect, iptables-redirect, mitm-wrapper, standalone-ls, parse-snapshot) - Disable tool stripping in MITM for tool call investigation - Update GEMINI.md with CLI tools documentation
454 lines
17 KiB
Rust
454 lines
17 KiB
Rust
//! Request body modification for intercepted LLM API calls.
|
|
//!
|
|
//! Aggressively strips everything except identity and actual conversation
|
|
//! from the Gemini API request. No integrity checks exist on the request
|
|
//! body — Google validates OAuth, project, model, and JSON structure only.
|
|
|
|
use regex::Regex;
|
|
use serde_json::Value;
|
|
use tracing::info;
|
|
|
|
/// Strip ALL tool definitions.
|
|
/// Set to false to allow tools through (for tool call testing).
|
|
const STRIP_ALL_TOOLS: bool = false;
|
|
|
|
/// Modify a streamGenerateContent request body in-place.
|
|
/// Returns the modified JSON bytes, or None if modification wasn't possible.
|
|
pub fn modify_request(body: &[u8]) -> Option<Vec<u8>> {
|
|
let mut json: Value = serde_json::from_slice(body).ok()?;
|
|
|
|
let original_size = body.len();
|
|
let mut changes: Vec<String> = Vec::new();
|
|
|
|
// ── 1. System instruction: keep ONLY <identity>, nuke everything else ──
|
|
if let Some(sys) = json
|
|
.pointer_mut("/request/systemInstruction/parts/0/text")
|
|
.and_then(|v| v.as_str())
|
|
.map(|s| s.to_string())
|
|
{
|
|
let original_len = sys.len();
|
|
|
|
// Extract <identity>...</identity> block
|
|
let identity = extract_xml_section(&sys, "identity");
|
|
|
|
if let Some(identity_text) = identity {
|
|
let new_sys = format!("<identity>\n{}\n</identity>", identity_text.trim());
|
|
let stripped = original_len - new_sys.len();
|
|
if stripped > 0 {
|
|
changes.push(format!(
|
|
"system instruction: keep <identity> only ({original_len} → {} chars, -{stripped})",
|
|
new_sys.len()
|
|
));
|
|
json["request"]["systemInstruction"]["parts"][0]["text"] =
|
|
Value::String(new_sys);
|
|
}
|
|
} else {
|
|
// No identity tag found — clear the whole thing
|
|
changes.push(format!("system instruction: cleared ({original_len} chars)"));
|
|
json["request"]["systemInstruction"]["parts"][0]["text"] =
|
|
Value::String(String::new());
|
|
}
|
|
}
|
|
|
|
// ── 2. Content messages: keep only actual conversation turns ───────────
|
|
if let Some(contents) = json
|
|
.pointer_mut("/request/contents")
|
|
.and_then(|v| v.as_array_mut())
|
|
{
|
|
let before = contents.len();
|
|
|
|
// Remove messages that are pure Antigravity context injection
|
|
contents.retain(|msg| {
|
|
if let Some(text) = msg["parts"][0]["text"].as_str() {
|
|
// Strip user_information (OS, workspace paths)
|
|
if text.starts_with("<user_information>") {
|
|
return false;
|
|
}
|
|
// Strip user_rules / MEMORY blocks
|
|
if text.starts_with("<user_rules>") {
|
|
return false;
|
|
}
|
|
// Strip workflows
|
|
if text.starts_with("<workflows>") {
|
|
return false;
|
|
}
|
|
// Strip MCP servers block
|
|
if text.starts_with("<mcp_servers>") {
|
|
return false;
|
|
}
|
|
}
|
|
true
|
|
});
|
|
|
|
// For remaining messages, strip embedded metadata
|
|
for msg in contents.iter_mut() {
|
|
if let Some(text) = msg["parts"][0]["text"].as_str().map(|s| s.to_string()) {
|
|
let mut modified = text.clone();
|
|
|
|
// Strip conversation summaries block
|
|
if let Some(cleaned) = strip_between(&modified, "# Conversation History\n", "</conversation_summaries>") {
|
|
modified = cleaned;
|
|
}
|
|
|
|
// Strip <ADDITIONAL_METADATA> blocks (cursor pos, open files, etc.)
|
|
if let Some(cleaned) = strip_xml_section(&modified, "ADDITIONAL_METADATA") {
|
|
modified = cleaned;
|
|
}
|
|
|
|
// Strip <EPHEMERAL_MESSAGE> blocks
|
|
if let Some(cleaned) = strip_xml_section(&modified, "EPHEMERAL_MESSAGE") {
|
|
modified = cleaned;
|
|
}
|
|
|
|
// Strip "Step Id: N\n" prefixes
|
|
if modified.starts_with("Step Id:") {
|
|
if let Some(newline_pos) = modified.find('\n') {
|
|
modified = modified[newline_pos + 1..].to_string();
|
|
}
|
|
}
|
|
|
|
// Strip knowledge item blocks
|
|
if let Some(cleaned) = strip_between(&modified, "Here are the ", "</knowledge_item>") {
|
|
// Only strip if it's about knowledge items
|
|
if cleaned.len() < modified.len() && modified.contains("knowledge item") {
|
|
modified = cleaned;
|
|
}
|
|
}
|
|
|
|
// Clean up excessive whitespace from stripping
|
|
let modified = collapse_newlines(&modified);
|
|
|
|
if modified.len() < text.len() {
|
|
msg["parts"][0]["text"] = Value::String(modified);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove now-empty messages
|
|
contents.retain(|msg| {
|
|
if let Some(text) = msg["parts"][0]["text"].as_str() {
|
|
!text.trim().is_empty()
|
|
} else {
|
|
true
|
|
}
|
|
});
|
|
|
|
let removed = before - contents.len();
|
|
if removed > 0 {
|
|
changes.push(format!("remove {removed}/{before} content messages"));
|
|
}
|
|
}
|
|
|
|
// ── 3. Strip all tool definitions ────────────────────────────────────
|
|
if STRIP_ALL_TOOLS {
|
|
if let Some(tools) = json
|
|
.pointer_mut("/request/tools")
|
|
.and_then(|v| v.as_array_mut())
|
|
{
|
|
let count = tools.len();
|
|
if count > 0 {
|
|
tools.clear();
|
|
changes.push(format!("strip all {count} tools"));
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── 4. Inject includeThoughts to capture thinking text ───────────────
|
|
// Without this flag, Google only reports thinking token counts
|
|
// but doesn't send the thinking text in SSE parts.
|
|
{
|
|
// Ensure request.generationConfig.thinkingConfig.includeThoughts = true
|
|
let request = json.get_mut("request").and_then(|v| v.as_object_mut());
|
|
if let Some(req) = request {
|
|
let gen_config = req
|
|
.entry("generationConfig")
|
|
.or_insert_with(|| serde_json::json!({}));
|
|
if let Some(gc) = gen_config.as_object_mut() {
|
|
let thinking_config = gc
|
|
.entry("thinkingConfig")
|
|
.or_insert_with(|| serde_json::json!({}));
|
|
if let Some(tc) = thinking_config.as_object_mut() {
|
|
if !tc.contains_key("includeThoughts") {
|
|
tc.insert("includeThoughts".to_string(), Value::Bool(true));
|
|
changes.push("inject includeThoughts".to_string());
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Not wrapped in request — try top-level (public API format)
|
|
let gen_config = json.as_object_mut().and_then(|o| {
|
|
Some(o.entry("generationConfig")
|
|
.or_insert_with(|| serde_json::json!({})))
|
|
});
|
|
if let Some(gc) = gen_config.and_then(|v| v.as_object_mut()) {
|
|
let thinking_config = gc
|
|
.entry("thinkingConfig")
|
|
.or_insert_with(|| serde_json::json!({}));
|
|
if let Some(tc) = thinking_config.as_object_mut() {
|
|
if !tc.contains_key("includeThoughts") {
|
|
tc.insert("includeThoughts".to_string(), Value::Bool(true));
|
|
changes.push("inject includeThoughts (top-level)".to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if changes.is_empty() {
|
|
return None; // Nothing modified
|
|
}
|
|
|
|
let modified_bytes = serde_json::to_vec(&json).ok()?;
|
|
let saved = original_size as i64 - modified_bytes.len() as i64;
|
|
let pct = if original_size > 0 {
|
|
(saved as f64 / original_size as f64 * 100.0) as i32
|
|
} else {
|
|
0
|
|
};
|
|
|
|
info!(
|
|
original = original_size,
|
|
modified = modified_bytes.len(),
|
|
saved_bytes = saved,
|
|
saved_pct = pct,
|
|
"MITM: request modified [{}]",
|
|
changes.join(", ")
|
|
);
|
|
|
|
Some(modified_bytes)
|
|
}
|
|
|
|
/// Extract the inner text of an XML-style section.
|
|
fn extract_xml_section(text: &str, tag: &str) -> Option<String> {
|
|
let open = format!("<{tag}>");
|
|
let close = format!("</{tag}>");
|
|
let start = text.find(&open)?;
|
|
let end = text.find(&close)?;
|
|
let inner_start = start + open.len();
|
|
if inner_start >= end {
|
|
return None;
|
|
}
|
|
Some(text[inner_start..end].to_string())
|
|
}
|
|
|
|
/// Strip an XML-style section and return the modified text.
|
|
fn strip_xml_section(text: &str, tag: &str) -> Option<String> {
|
|
let open = format!("<{tag}>");
|
|
let close = format!("</{tag}>");
|
|
let start = text.find(&open)?;
|
|
let end = text.find(&close)?;
|
|
let end_pos = end + close.len();
|
|
Some(format!("{}{}", &text[..start], &text[end_pos..]))
|
|
}
|
|
|
|
/// Strip everything between two markers (inclusive of markers).
|
|
fn strip_between(text: &str, start_marker: &str, end_marker: &str) -> Option<String> {
|
|
let start = text.find(start_marker)?;
|
|
let end = text.find(end_marker)?;
|
|
let end_pos = end + end_marker.len();
|
|
// Skip any trailing whitespace after end marker
|
|
let rest = text[end_pos..].trim_start();
|
|
Some(format!("{}{}", &text[..start], rest))
|
|
}
|
|
|
|
/// Collapse 3+ consecutive newlines into 2.
|
|
fn collapse_newlines(text: &str) -> String {
|
|
let re = Regex::new(r"\n{3,}").unwrap();
|
|
re.replace_all(text, "\n\n").to_string()
|
|
}
|
|
|
|
/// Dechunk an HTTP chunked-encoded body into raw bytes.
|
|
pub fn dechunk(data: &[u8]) -> Vec<u8> {
|
|
let mut result = Vec::with_capacity(data.len());
|
|
let mut pos = 0;
|
|
|
|
while pos < data.len() {
|
|
let line_end = match data[pos..].windows(2).position(|w| w == b"\r\n") {
|
|
Some(p) => pos + p,
|
|
None => break,
|
|
};
|
|
|
|
let size_str = std::str::from_utf8(&data[pos..line_end])
|
|
.unwrap_or("")
|
|
.split(';')
|
|
.next()
|
|
.unwrap_or("")
|
|
.trim();
|
|
|
|
let chunk_size = match usize::from_str_radix(size_str, 16) {
|
|
Ok(0) => break,
|
|
Ok(n) => n,
|
|
Err(_) => break,
|
|
};
|
|
|
|
let data_start = line_end + 2;
|
|
let data_end = (data_start + chunk_size).min(data.len());
|
|
result.extend_from_slice(&data[data_start..data_end]);
|
|
|
|
pos = data_end + 2;
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Re-encode data as a single HTTP chunk + terminal chunk.
|
|
pub fn rechunk(data: &[u8]) -> Vec<u8> {
|
|
let hex_size = format!("{:x}", data.len());
|
|
let mut result = Vec::with_capacity(hex_size.len() + 2 + data.len() + 2 + 5);
|
|
result.extend_from_slice(hex_size.as_bytes());
|
|
result.extend_from_slice(b"\r\n");
|
|
result.extend_from_slice(data);
|
|
result.extend_from_slice(b"\r\n0\r\n\r\n");
|
|
result
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_dechunk_basic() {
|
|
let chunked = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
|
|
let result = dechunk(chunked);
|
|
assert_eq!(result, b"hello world");
|
|
}
|
|
|
|
#[test]
|
|
fn test_dechunk_single() {
|
|
let chunked = b"b\r\nhello world\r\n0\r\n\r\n";
|
|
let result = dechunk(chunked);
|
|
assert_eq!(result, b"hello world");
|
|
}
|
|
|
|
#[test]
|
|
fn test_rechunk() {
|
|
let data = b"hello world";
|
|
let chunked = rechunk(data);
|
|
let expected = b"b\r\nhello world\r\n0\r\n\r\n";
|
|
assert_eq!(chunked, expected);
|
|
}
|
|
|
|
#[test]
|
|
fn test_dechunk_rechunk_roundtrip() {
|
|
let original = b"5\r\nhello\r\n6\r\n world\r\n0\r\n\r\n";
|
|
let data = dechunk(original);
|
|
let rechunked = rechunk(&data);
|
|
let data2 = dechunk(&rechunked);
|
|
assert_eq!(data, data2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_modify_strips_all_tools() {
|
|
let body = serde_json::json!({
|
|
"project": "test",
|
|
"requestId": "test/1",
|
|
"request": {
|
|
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
|
|
"tools": [
|
|
{"functionDeclarations": [{"name": "view_file", "description": "view", "parameters": {}}]},
|
|
{"functionDeclarations": [{"name": "browser_subagent", "description": "browse", "parameters": {}}]},
|
|
],
|
|
"generationConfig": {}
|
|
},
|
|
"model": "test"
|
|
});
|
|
|
|
let bytes = serde_json::to_vec(&body).unwrap();
|
|
let modified = modify_request(&bytes).unwrap();
|
|
let result: Value = serde_json::from_slice(&modified).unwrap();
|
|
|
|
let tools = result["request"]["tools"].as_array().unwrap();
|
|
assert!(tools.is_empty(), "all tools should be stripped");
|
|
}
|
|
|
|
#[test]
|
|
fn test_modify_keeps_only_identity() {
|
|
let sys_text = "<identity>\nYou are a helpful AI.\n</identity>\n\n<tool_calling>\nUse absolute paths.\n</tool_calling>\n<web_application_development>\nlots of web dev stuff\n</web_application_development>\n<communication_style>\nbe helpful\n</communication_style>";
|
|
let body = serde_json::json!({
|
|
"project": "test",
|
|
"requestId": "test/1",
|
|
"request": {
|
|
"contents": [{"role": "user", "parts": [{"text": "hello"}]}],
|
|
"systemInstruction": {"parts": [{"text": sys_text}]},
|
|
"tools": [],
|
|
"generationConfig": {}
|
|
},
|
|
"model": "test"
|
|
});
|
|
|
|
let bytes = serde_json::to_vec(&body).unwrap();
|
|
let modified = modify_request(&bytes).unwrap();
|
|
let result: Value = serde_json::from_slice(&modified).unwrap();
|
|
|
|
let new_sys = result["request"]["systemInstruction"]["parts"][0]["text"]
|
|
.as_str()
|
|
.unwrap();
|
|
|
|
assert!(new_sys.contains("<identity>"));
|
|
assert!(new_sys.contains("You are a helpful AI."));
|
|
assert!(!new_sys.contains("tool_calling"));
|
|
assert!(!new_sys.contains("web_application_development"));
|
|
assert!(!new_sys.contains("communication_style"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_modify_strips_context_messages() {
|
|
let body = serde_json::json!({
|
|
"project": "test",
|
|
"requestId": "test/1",
|
|
"request": {
|
|
"contents": [
|
|
{"role": "user", "parts": [{"text": "<user_information>\nLinux\n</user_information>"}]},
|
|
{"role": "user", "parts": [{"text": "<user_rules>\nno rules\n</user_rules>"}]},
|
|
{"role": "user", "parts": [{"text": "<workflows>\nsome workflows\n</workflows>"}]},
|
|
{"role": "user", "parts": [{"text": "Step Id: 0\n\n<USER_REQUEST>\nSay hello\n</USER_REQUEST>\n<ADDITIONAL_METADATA>\ncursor stuff\n</ADDITIONAL_METADATA>"}]},
|
|
{"role": "model", "parts": [{"text": "Hello!"}]},
|
|
],
|
|
"tools": [],
|
|
"generationConfig": {}
|
|
},
|
|
"model": "test"
|
|
});
|
|
|
|
let bytes = serde_json::to_vec(&body).unwrap();
|
|
let modified = modify_request(&bytes).unwrap();
|
|
let result: Value = serde_json::from_slice(&modified).unwrap();
|
|
|
|
let contents = result["request"]["contents"].as_array().unwrap();
|
|
// Should have removed user_information, user_rules, workflows (3 messages)
|
|
// Kept: USER_REQUEST message (with ADDITIONAL_METADATA stripped) + model response
|
|
assert_eq!(contents.len(), 2, "should keep only user request + model response");
|
|
|
|
// Check USER_REQUEST message had metadata stripped
|
|
let user_msg = contents[0]["parts"][0]["text"].as_str().unwrap();
|
|
assert!(user_msg.contains("Say hello"), "should keep user request");
|
|
assert!(!user_msg.contains("ADDITIONAL_METADATA"), "should strip metadata");
|
|
assert!(!user_msg.contains("cursor stuff"), "should strip cursor info");
|
|
assert!(!user_msg.starts_with("Step Id:"), "should strip step id");
|
|
|
|
// Model response kept intact
|
|
assert_eq!(contents[1]["parts"][0]["text"].as_str().unwrap(), "Hello!");
|
|
}
|
|
|
|
#[test]
|
|
fn test_extract_xml_section() {
|
|
let text = "before <identity>\nI am AI\n</identity> after";
|
|
let result = extract_xml_section(text, "identity").unwrap();
|
|
assert_eq!(result, "\nI am AI\n");
|
|
}
|
|
|
|
#[test]
|
|
fn test_strip_xml_section() {
|
|
let text = "before <META>\nstuff\n</META> after";
|
|
let result = strip_xml_section(text, "META").unwrap();
|
|
assert_eq!(result, "before after");
|
|
}
|
|
|
|
#[test]
|
|
fn test_strip_between() {
|
|
let text = "keep this # Conversation History\nlots of stuff\n</conversation_summaries>\nand this";
|
|
let result = strip_between(text, "# Conversation History\n", "</conversation_summaries>").unwrap();
|
|
assert_eq!(result, "keep this and this");
|
|
}
|
|
}
|