feat: MITM interception for standalone LS with UID isolation

- Spawn standalone LS as dedicated 'antigravity-ls' user via sudo
- UID-scoped iptables redirect (port 443 → MITM proxy) via mitm-redirect.sh
- Combined CA bundle (system CAs + MITM CA) for Go TLS trust
- Transparent TLS interception with chunked response detection
- Google SSE parser for streamGenerateContent usage extraction
- Timeouts on all MITM operations (TLS handshake, upstream, idle)
- Forward response data immediately (no buffering)
- Per-model token usage capture (input, output, thinking)
- Update docs and known issues to reflect resolved TLS blocker
This commit is contained in:
Nikketryhard
2026-02-14 17:50:12 -06:00
parent 6842bfeaa5
commit d4de436856
10 changed files with 1156 additions and 478 deletions

View File

@@ -56,9 +56,11 @@ pub struct StreamingAccumulator {
pub output_tokens: u64,
pub cache_creation_input_tokens: u64,
pub cache_read_input_tokens: u64,
pub thinking_tokens: u64,
pub model: Option<String>,
pub stop_reason: Option<String>,
pub is_complete: bool,
pub api_provider: Option<String>,
}
impl StreamingAccumulator {
@@ -66,13 +68,46 @@ impl StreamingAccumulator {
Self::default()
}
/// Process a single SSE event.
/// Process a single SSE event.
pub fn process_event(&mut self, event: &Value) {
// ── Google format: {"response": {"usageMetadata": {...}, "modelVersion": "..."}} ──
if let Some(response) = event.get("response") {
// Extract usage metadata (each event has cumulative counts)
if let Some(usage) = response.get("usageMetadata") {
self.input_tokens = usage["promptTokenCount"].as_u64().unwrap_or(self.input_tokens);
self.output_tokens = usage["candidatesTokenCount"].as_u64().unwrap_or(self.output_tokens);
self.thinking_tokens = usage["thoughtsTokenCount"].as_u64().unwrap_or(self.thinking_tokens);
}
if let Some(model) = response["modelVersion"].as_str() {
self.model = Some(model.to_string());
}
// Check for completion in candidates
if let Some(candidates) = response.get("candidates").and_then(|c| c.as_array()) {
for candidate in candidates {
if let Some(reason) = candidate["finishReason"].as_str() {
self.stop_reason = Some(reason.to_string());
if reason == "STOP" {
self.is_complete = true;
}
}
}
}
self.api_provider = Some("google".to_string());
trace!(
input = self.input_tokens,
output = self.output_tokens,
thinking = self.thinking_tokens,
complete = self.is_complete,
"SSE Google: usage update"
);
return;
}
// ── Anthropic format: {"type": "message_start"|"message_delta"|"message_stop"} ──
let event_type = event["type"].as_str().unwrap_or("");
match event_type {
"message_start" => {
// message_start contains the initial usage (input tokens + cache)
if let Some(usage) = event.get("message").and_then(|m| m.get("usage")) {
self.input_tokens = usage["input_tokens"].as_u64().unwrap_or(0);
self.cache_creation_input_tokens = usage["cache_creation_input_tokens"].as_u64().unwrap_or(0);
@@ -81,36 +116,27 @@ impl StreamingAccumulator {
if let Some(model) = event.get("message").and_then(|m| m["model"].as_str()) {
self.model = Some(model.to_string());
}
trace!(
input = self.input_tokens,
cache_read = self.cache_read_input_tokens,
cache_create = self.cache_creation_input_tokens,
"SSE message_start: captured input usage"
);
self.api_provider = Some("anthropic".to_string());
trace!(input = self.input_tokens, "SSE Anthropic: message_start");
}
"message_delta" => {
// message_delta contains the output usage
if let Some(usage) = event.get("usage") {
self.output_tokens = usage["output_tokens"].as_u64().unwrap_or(self.output_tokens);
}
if let Some(reason) = event["delta"]["stop_reason"].as_str() {
self.stop_reason = Some(reason.to_string());
}
trace!(output = self.output_tokens, "SSE message_delta: updated output tokens");
}
"message_stop" => {
self.is_complete = true;
debug!(
input = self.input_tokens,
output = self.output_tokens,
cache_read = self.cache_read_input_tokens,
model = ?self.model,
"SSE message_stop: stream complete"
"SSE Anthropic: stream complete"
);
}
"content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {
// Content events — no usage data, just pass through
}
"content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {}
_ => {
trace!(event_type, "SSE: unknown event type");
}
@@ -124,11 +150,11 @@ impl StreamingAccumulator {
output_tokens: self.output_tokens,
cache_creation_input_tokens: self.cache_creation_input_tokens,
cache_read_input_tokens: self.cache_read_input_tokens,
thinking_output_tokens: 0,
thinking_output_tokens: self.thinking_tokens,
response_output_tokens: 0,
model: self.model,
stop_reason: self.stop_reason,
api_provider: Some("anthropic".to_string()),
api_provider: self.api_provider.unwrap_or_else(|| "unknown".to_string()).into(),
grpc_method: None,
captured_at: std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)