feat: initial commit — antigravity proxy with MITM, standalone LS, and snapshot tooling

This commit is contained in:
Nikketryhard
2026-02-14 02:24:35 -06:00
commit d5e7f09225
30 changed files with 9980 additions and 0 deletions

163
src/mitm/store.rs Normal file
View File

@@ -0,0 +1,163 @@
//! Shared store for intercepted API usage data.
//!
//! The MITM proxy writes usage data here; the API handlers read from it.
use std::collections::HashMap;
use std::sync::Arc;
use tokio::sync::RwLock;
use serde::{Deserialize, Serialize};
use tracing::debug;
/// Token usage from an intercepted API response.
///
/// Covers both Anthropic JSON/SSE responses and Google gRPC protobuf responses.
/// Fields map to the superset of Anthropic's `usage` object and Google's `ModelUsageStats` proto.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ApiUsage {
pub input_tokens: u64,
pub output_tokens: u64,
/// Anthropic: cache_creation_input_tokens / Google: cache_write_tokens
pub cache_creation_input_tokens: u64,
/// Anthropic: cache_read_input_tokens / Google: cache_read_tokens
pub cache_read_input_tokens: u64,
/// Google-specific: thinking/reasoning output tokens (extended thinking)
pub thinking_output_tokens: u64,
/// Google-specific: response output tokens (non-thinking portion)
pub response_output_tokens: u64,
/// Total cost in USD (if provided by the API).
pub total_cost_usd: Option<f64>,
/// The actual model that served the request.
pub model: Option<String>,
/// Stop reason / finish reason from the API.
pub stop_reason: Option<String>,
/// API provider (e.g. "anthropic", "google")
pub api_provider: Option<String>,
/// gRPC method path (e.g. "/google.internal.cloud.code.v1internal.PredictionService/GenerateContent")
pub grpc_method: Option<String>,
/// Timestamp when this usage was captured.
pub captured_at: u64,
}
/// Thread-safe store for intercepted data.
///
/// Keyed by a unique request ID that we can correlate with cascade operations.
/// In practice, we use the cascade ID + a sequence number.
#[derive(Clone)]
pub struct MitmStore {
/// Most recent usage per cascade ID.
latest_usage: Arc<RwLock<HashMap<String, ApiUsage>>>,
/// Global aggregate stats.
stats: Arc<RwLock<MitmStats>>,
}
/// Aggregate statistics across all intercepted traffic.
#[derive(Debug, Clone, Default, Serialize)]
pub struct MitmStats {
pub total_requests: u64,
pub total_input_tokens: u64,
pub total_output_tokens: u64,
pub total_cache_read_tokens: u64,
pub total_cache_creation_tokens: u64,
pub total_thinking_output_tokens: u64,
pub total_response_output_tokens: u64,
/// Per-model usage breakdown (model name → stats).
pub per_model: HashMap<String, ModelStats>,
}
/// Per-model usage counters.
#[derive(Debug, Clone, Default, Serialize)]
pub struct ModelStats {
pub requests: u64,
pub input_tokens: u64,
pub output_tokens: u64,
pub cache_read_tokens: u64,
pub cache_creation_tokens: u64,
}
impl MitmStore {
pub fn new() -> Self {
Self {
latest_usage: Arc::new(RwLock::new(HashMap::new())),
stats: Arc::new(RwLock::new(MitmStats::default())),
}
}
/// Record a completed API exchange with usage data.
pub async fn record_usage(&self, cascade_id: Option<&str>, usage: ApiUsage) {
debug!(
input = usage.input_tokens,
output = usage.output_tokens,
cache_read = usage.cache_read_input_tokens,
cache_create = usage.cache_creation_input_tokens,
thinking = usage.thinking_output_tokens,
response = usage.response_output_tokens,
model = ?usage.model,
provider = ?usage.api_provider,
grpc = ?usage.grpc_method,
"MITM captured API usage"
);
// Update aggregate stats
{
let mut stats = self.stats.write().await;
stats.total_requests += 1;
stats.total_input_tokens += usage.input_tokens;
stats.total_output_tokens += usage.output_tokens;
stats.total_cache_read_tokens += usage.cache_read_input_tokens;
stats.total_cache_creation_tokens += usage.cache_creation_input_tokens;
stats.total_thinking_output_tokens += usage.thinking_output_tokens;
stats.total_response_output_tokens += usage.response_output_tokens;
// Per-model breakdown
if let Some(ref model_name) = usage.model {
let model_stats = stats.per_model.entry(model_name.clone()).or_default();
model_stats.requests += 1;
model_stats.input_tokens += usage.input_tokens;
model_stats.output_tokens += usage.output_tokens;
model_stats.cache_read_tokens += usage.cache_read_input_tokens;
model_stats.cache_creation_tokens += usage.cache_creation_input_tokens;
}
}
// Store latest usage for the cascade (if we can identify it)
let key = cascade_id.map(|s| s.to_string()).unwrap_or_else(|| "_latest".to_string());
let mut latest = self.latest_usage.write().await;
latest.insert(key, usage);
// Evict old entries to prevent unbounded memory growth
const MAX_ENTRIES: usize = 500;
if latest.len() > MAX_ENTRIES {
// Find the oldest entry by captured_at and remove it
let oldest_key = latest
.iter()
.min_by_key(|(_, v)| v.captured_at)
.map(|(k, _)| k.clone());
if let Some(key) = oldest_key {
latest.remove(&key);
}
}
}
/// Get the latest usage for a cascade, consuming it (one-shot read).
///
/// Only returns exact cascade_id matches — no cross-cascade fallback.
/// The `_latest` key is only consumed when the caller explicitly requests it
/// (i.e., when the MITM couldn't identify the cascade).
pub async fn take_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
let mut latest = self.latest_usage.write().await;
latest.remove(cascade_id)
}
/// Peek at the latest usage without consuming it.
#[allow(dead_code)]
pub async fn peek_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
let latest = self.latest_usage.read().await;
latest.get(cascade_id)
.cloned()
}
/// Get aggregate stats.
pub async fn stats(&self) -> MitmStats {
self.stats.read().await.clone()
}
}