feat: initial commit — antigravity proxy with MITM, standalone LS, and snapshot tooling

2026-02-14 02:24:35 -06:00
commit d5e7f09225
30 changed files with 9980 additions and 0 deletions
--- a/src/mitm/store.rs
+++ b/src/mitm/store.rs
@@ -0,0 +1,163 @@
+//! Shared store for intercepted API usage data.
+//!
+//! The MITM proxy writes usage data here; the API handlers read from it.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::RwLock;
+use serde::{Deserialize, Serialize};
+use tracing::debug;
+
+/// Token usage from an intercepted API response.
+///
+/// Covers both Anthropic JSON/SSE responses and Google gRPC protobuf responses.
+/// Fields map to the superset of Anthropic's `usage` object and Google's `ModelUsageStats` proto.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ApiUsage {
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    /// Anthropic: cache_creation_input_tokens / Google: cache_write_tokens
+    pub cache_creation_input_tokens: u64,
+    /// Anthropic: cache_read_input_tokens / Google: cache_read_tokens
+    pub cache_read_input_tokens: u64,
+    /// Google-specific: thinking/reasoning output tokens (extended thinking)
+    pub thinking_output_tokens: u64,
+    /// Google-specific: response output tokens (non-thinking portion)
+    pub response_output_tokens: u64,
+    /// Total cost in USD (if provided by the API).
+    pub total_cost_usd: Option<f64>,
+    /// The actual model that served the request.
+    pub model: Option<String>,
+    /// Stop reason / finish reason from the API.
+    pub stop_reason: Option<String>,
+    /// API provider (e.g. "anthropic", "google")
+    pub api_provider: Option<String>,
+    /// gRPC method path (e.g. "/google.internal.cloud.code.v1internal.PredictionService/GenerateContent")
+    pub grpc_method: Option<String>,
+    /// Timestamp when this usage was captured.
+    pub captured_at: u64,
+}
+
+/// Thread-safe store for intercepted data.
+///
+/// Keyed by a unique request ID that we can correlate with cascade operations.
+/// In practice, we use the cascade ID + a sequence number.
+#[derive(Clone)]
+pub struct MitmStore {
+    /// Most recent usage per cascade ID.
+    latest_usage: Arc<RwLock<HashMap<String, ApiUsage>>>,
+    /// Global aggregate stats.
+    stats: Arc<RwLock<MitmStats>>,
+}
+
+/// Aggregate statistics across all intercepted traffic.
+#[derive(Debug, Clone, Default, Serialize)]
+pub struct MitmStats {
+    pub total_requests: u64,
+    pub total_input_tokens: u64,
+    pub total_output_tokens: u64,
+    pub total_cache_read_tokens: u64,
+    pub total_cache_creation_tokens: u64,
+    pub total_thinking_output_tokens: u64,
+    pub total_response_output_tokens: u64,
+    /// Per-model usage breakdown (model name → stats).
+    pub per_model: HashMap<String, ModelStats>,
+}
+
+/// Per-model usage counters.
+#[derive(Debug, Clone, Default, Serialize)]
+pub struct ModelStats {
+    pub requests: u64,
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub cache_read_tokens: u64,
+    pub cache_creation_tokens: u64,
+}
+
+impl MitmStore {
+    pub fn new() -> Self {
+        Self {
+            latest_usage: Arc::new(RwLock::new(HashMap::new())),
+            stats: Arc::new(RwLock::new(MitmStats::default())),
+        }
+    }
+
+    /// Record a completed API exchange with usage data.
+    pub async fn record_usage(&self, cascade_id: Option<&str>, usage: ApiUsage) {
+        debug!(
+            input = usage.input_tokens,
+            output = usage.output_tokens,
+            cache_read = usage.cache_read_input_tokens,
+            cache_create = usage.cache_creation_input_tokens,
+            thinking = usage.thinking_output_tokens,
+            response = usage.response_output_tokens,
+            model = ?usage.model,
+            provider = ?usage.api_provider,
+            grpc = ?usage.grpc_method,
+            "MITM captured API usage"
+        );
+
+        // Update aggregate stats
+        {
+            let mut stats = self.stats.write().await;
+            stats.total_requests += 1;
+            stats.total_input_tokens += usage.input_tokens;
+            stats.total_output_tokens += usage.output_tokens;
+            stats.total_cache_read_tokens += usage.cache_read_input_tokens;
+            stats.total_cache_creation_tokens += usage.cache_creation_input_tokens;
+            stats.total_thinking_output_tokens += usage.thinking_output_tokens;
+            stats.total_response_output_tokens += usage.response_output_tokens;
+
+            // Per-model breakdown
+            if let Some(ref model_name) = usage.model {
+                let model_stats = stats.per_model.entry(model_name.clone()).or_default();
+                model_stats.requests += 1;
+                model_stats.input_tokens += usage.input_tokens;
+                model_stats.output_tokens += usage.output_tokens;
+                model_stats.cache_read_tokens += usage.cache_read_input_tokens;
+                model_stats.cache_creation_tokens += usage.cache_creation_input_tokens;
+            }
+        }
+
+        // Store latest usage for the cascade (if we can identify it)
+        let key = cascade_id.map(|s| s.to_string()).unwrap_or_else(|| "_latest".to_string());
+        let mut latest = self.latest_usage.write().await;
+        latest.insert(key, usage);
+
+        // Evict old entries to prevent unbounded memory growth
+        const MAX_ENTRIES: usize = 500;
+        if latest.len() > MAX_ENTRIES {
+            // Find the oldest entry by captured_at and remove it
+            let oldest_key = latest
+                .iter()
+                .min_by_key(|(_, v)| v.captured_at)
+                .map(|(k, _)| k.clone());
+            if let Some(key) = oldest_key {
+                latest.remove(&key);
+            }
+        }
+    }
+
+    /// Get the latest usage for a cascade, consuming it (one-shot read).
+    ///
+    /// Only returns exact cascade_id matches — no cross-cascade fallback.
+    /// The `_latest` key is only consumed when the caller explicitly requests it
+    /// (i.e., when the MITM couldn't identify the cascade).
+    pub async fn take_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
+        let mut latest = self.latest_usage.write().await;
+        latest.remove(cascade_id)
+    }
+
+    /// Peek at the latest usage without consuming it.
+    #[allow(dead_code)]
+    pub async fn peek_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
+        let latest = self.latest_usage.read().await;
+        latest.get(cascade_id)
+            .cloned()
+    }
+
+    /// Get aggregate stats.
+    pub async fn stats(&self) -> MitmStats {
+        self.stats.read().await.clone()
+    }
+}