feat: MITM interception for standalone LS with UID isolation

- Spawn standalone LS as dedicated 'antigravity-ls' user via sudo - UID-scoped iptables redirect (port 443 → MITM proxy) via mitm-redirect.sh - Combined CA bundle (system CAs + MITM CA) for Go TLS trust - Transparent TLS interception with chunked response detection - Google SSE parser for streamGenerateContent usage extraction - Timeouts on all MITM operations (TLS handshake, upstream, idle) - Forward response data immediately (no buffering) - Per-model token usage capture (input, output, thinking) - Update docs and known issues to reflect resolved TLS blocker
2026-02-14 17:50:12 -06:00
parent 6842bfeaa5
commit d4de436856
10 changed files with 1156 additions and 478 deletions
--- a/src/mitm/intercept.rs
+++ b/src/mitm/intercept.rs
@@ -56,9 +56,11 @@ pub struct StreamingAccumulator {
    pub output_tokens: u64,
    pub cache_creation_input_tokens: u64,
    pub cache_read_input_tokens: u64,
+    pub thinking_tokens: u64,
    pub model: Option<String>,
    pub stop_reason: Option<String>,
    pub is_complete: bool,
+    pub api_provider: Option<String>,
 }

 impl StreamingAccumulator {
@@ -66,13 +68,46 @@ impl StreamingAccumulator {
        Self::default()
    }

-    /// Process a single SSE event.
+/// Process a single SSE event.
    pub fn process_event(&mut self, event: &Value) {
+        // ── Google format: {"response": {"usageMetadata": {...}, "modelVersion": "..."}} ──
+        if let Some(response) = event.get("response") {
+            // Extract usage metadata (each event has cumulative counts)
+            if let Some(usage) = response.get("usageMetadata") {
+                self.input_tokens = usage["promptTokenCount"].as_u64().unwrap_or(self.input_tokens);
+                self.output_tokens = usage["candidatesTokenCount"].as_u64().unwrap_or(self.output_tokens);
+                self.thinking_tokens = usage["thoughtsTokenCount"].as_u64().unwrap_or(self.thinking_tokens);
+            }
+            if let Some(model) = response["modelVersion"].as_str() {
+                self.model = Some(model.to_string());
+            }
+            // Check for completion in candidates
+            if let Some(candidates) = response.get("candidates").and_then(|c| c.as_array()) {
+                for candidate in candidates {
+                    if let Some(reason) = candidate["finishReason"].as_str() {
+                        self.stop_reason = Some(reason.to_string());
+                        if reason == "STOP" {
+                            self.is_complete = true;
+                        }
+                    }
+                }
+            }
+            self.api_provider = Some("google".to_string());
+            trace!(
+                input = self.input_tokens,
+                output = self.output_tokens,
+                thinking = self.thinking_tokens,
+                complete = self.is_complete,
+                "SSE Google: usage update"
+            );
+            return;
+        }
+
+        // ── Anthropic format: {"type": "message_start"|"message_delta"|"message_stop"} ──
        let event_type = event["type"].as_str().unwrap_or("");

        match event_type {
            "message_start" => {
-                // message_start contains the initial usage (input tokens + cache)
                if let Some(usage) = event.get("message").and_then(|m| m.get("usage")) {
                    self.input_tokens = usage["input_tokens"].as_u64().unwrap_or(0);
                    self.cache_creation_input_tokens = usage["cache_creation_input_tokens"].as_u64().unwrap_or(0);
@@ -81,36 +116,27 @@ impl StreamingAccumulator {
                if let Some(model) = event.get("message").and_then(|m| m["model"].as_str()) {
                    self.model = Some(model.to_string());
                }
-                trace!(
-                    input = self.input_tokens,
-                    cache_read = self.cache_read_input_tokens,
-                    cache_create = self.cache_creation_input_tokens,
-                    "SSE message_start: captured input usage"
-                );
+                self.api_provider = Some("anthropic".to_string());
+                trace!(input = self.input_tokens, "SSE Anthropic: message_start");
            }
            "message_delta" => {
-                // message_delta contains the output usage
                if let Some(usage) = event.get("usage") {
                    self.output_tokens = usage["output_tokens"].as_u64().unwrap_or(self.output_tokens);
                }
                if let Some(reason) = event["delta"]["stop_reason"].as_str() {
                    self.stop_reason = Some(reason.to_string());
                }
-                trace!(output = self.output_tokens, "SSE message_delta: updated output tokens");
            }
            "message_stop" => {
                self.is_complete = true;
                debug!(
                    input = self.input_tokens,
                    output = self.output_tokens,
-                    cache_read = self.cache_read_input_tokens,
                    model = ?self.model,
-                    "SSE message_stop: stream complete"
+                    "SSE Anthropic: stream complete"
                );
            }
-            "content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {
-                // Content events — no usage data, just pass through
-            }
+            "content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {}
            _ => {
                trace!(event_type, "SSE: unknown event type");
            }
@@ -124,11 +150,11 @@ impl StreamingAccumulator {
            output_tokens: self.output_tokens,
            cache_creation_input_tokens: self.cache_creation_input_tokens,
            cache_read_input_tokens: self.cache_read_input_tokens,
-            thinking_output_tokens: 0,
+            thinking_output_tokens: self.thinking_tokens,
            response_output_tokens: 0,
            model: self.model,
            stop_reason: self.stop_reason,
-            api_provider: Some("anthropic".to_string()),
+            api_provider: self.api_provider.unwrap_or_else(|| "unknown".to_string()).into(),
            grpc_method: None,
            captured_at: std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)