feat: initial commit — antigravity proxy with MITM, standalone LS, and snapshot tooling
This commit is contained in:
163
src/mitm/store.rs
Normal file
163
src/mitm/store.rs
Normal file
@@ -0,0 +1,163 @@
|
||||
//! Shared store for intercepted API usage data.
|
||||
//!
|
||||
//! The MITM proxy writes usage data here; the API handlers read from it.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
|
||||
/// Token usage from an intercepted API response.
|
||||
///
|
||||
/// Covers both Anthropic JSON/SSE responses and Google gRPC protobuf responses.
|
||||
/// Fields map to the superset of Anthropic's `usage` object and Google's `ModelUsageStats` proto.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct ApiUsage {
|
||||
pub input_tokens: u64,
|
||||
pub output_tokens: u64,
|
||||
/// Anthropic: cache_creation_input_tokens / Google: cache_write_tokens
|
||||
pub cache_creation_input_tokens: u64,
|
||||
/// Anthropic: cache_read_input_tokens / Google: cache_read_tokens
|
||||
pub cache_read_input_tokens: u64,
|
||||
/// Google-specific: thinking/reasoning output tokens (extended thinking)
|
||||
pub thinking_output_tokens: u64,
|
||||
/// Google-specific: response output tokens (non-thinking portion)
|
||||
pub response_output_tokens: u64,
|
||||
/// Total cost in USD (if provided by the API).
|
||||
pub total_cost_usd: Option<f64>,
|
||||
/// The actual model that served the request.
|
||||
pub model: Option<String>,
|
||||
/// Stop reason / finish reason from the API.
|
||||
pub stop_reason: Option<String>,
|
||||
/// API provider (e.g. "anthropic", "google")
|
||||
pub api_provider: Option<String>,
|
||||
/// gRPC method path (e.g. "/google.internal.cloud.code.v1internal.PredictionService/GenerateContent")
|
||||
pub grpc_method: Option<String>,
|
||||
/// Timestamp when this usage was captured.
|
||||
pub captured_at: u64,
|
||||
}
|
||||
|
||||
/// Thread-safe store for intercepted data.
|
||||
///
|
||||
/// Keyed by a unique request ID that we can correlate with cascade operations.
|
||||
/// In practice, we use the cascade ID + a sequence number.
|
||||
#[derive(Clone)]
|
||||
pub struct MitmStore {
|
||||
/// Most recent usage per cascade ID.
|
||||
latest_usage: Arc<RwLock<HashMap<String, ApiUsage>>>,
|
||||
/// Global aggregate stats.
|
||||
stats: Arc<RwLock<MitmStats>>,
|
||||
}
|
||||
|
||||
/// Aggregate statistics across all intercepted traffic.
|
||||
#[derive(Debug, Clone, Default, Serialize)]
|
||||
pub struct MitmStats {
|
||||
pub total_requests: u64,
|
||||
pub total_input_tokens: u64,
|
||||
pub total_output_tokens: u64,
|
||||
pub total_cache_read_tokens: u64,
|
||||
pub total_cache_creation_tokens: u64,
|
||||
pub total_thinking_output_tokens: u64,
|
||||
pub total_response_output_tokens: u64,
|
||||
/// Per-model usage breakdown (model name → stats).
|
||||
pub per_model: HashMap<String, ModelStats>,
|
||||
}
|
||||
|
||||
/// Per-model usage counters.
|
||||
#[derive(Debug, Clone, Default, Serialize)]
|
||||
pub struct ModelStats {
|
||||
pub requests: u64,
|
||||
pub input_tokens: u64,
|
||||
pub output_tokens: u64,
|
||||
pub cache_read_tokens: u64,
|
||||
pub cache_creation_tokens: u64,
|
||||
}
|
||||
|
||||
impl MitmStore {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
latest_usage: Arc::new(RwLock::new(HashMap::new())),
|
||||
stats: Arc::new(RwLock::new(MitmStats::default())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a completed API exchange with usage data.
|
||||
pub async fn record_usage(&self, cascade_id: Option<&str>, usage: ApiUsage) {
|
||||
debug!(
|
||||
input = usage.input_tokens,
|
||||
output = usage.output_tokens,
|
||||
cache_read = usage.cache_read_input_tokens,
|
||||
cache_create = usage.cache_creation_input_tokens,
|
||||
thinking = usage.thinking_output_tokens,
|
||||
response = usage.response_output_tokens,
|
||||
model = ?usage.model,
|
||||
provider = ?usage.api_provider,
|
||||
grpc = ?usage.grpc_method,
|
||||
"MITM captured API usage"
|
||||
);
|
||||
|
||||
// Update aggregate stats
|
||||
{
|
||||
let mut stats = self.stats.write().await;
|
||||
stats.total_requests += 1;
|
||||
stats.total_input_tokens += usage.input_tokens;
|
||||
stats.total_output_tokens += usage.output_tokens;
|
||||
stats.total_cache_read_tokens += usage.cache_read_input_tokens;
|
||||
stats.total_cache_creation_tokens += usage.cache_creation_input_tokens;
|
||||
stats.total_thinking_output_tokens += usage.thinking_output_tokens;
|
||||
stats.total_response_output_tokens += usage.response_output_tokens;
|
||||
|
||||
// Per-model breakdown
|
||||
if let Some(ref model_name) = usage.model {
|
||||
let model_stats = stats.per_model.entry(model_name.clone()).or_default();
|
||||
model_stats.requests += 1;
|
||||
model_stats.input_tokens += usage.input_tokens;
|
||||
model_stats.output_tokens += usage.output_tokens;
|
||||
model_stats.cache_read_tokens += usage.cache_read_input_tokens;
|
||||
model_stats.cache_creation_tokens += usage.cache_creation_input_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
// Store latest usage for the cascade (if we can identify it)
|
||||
let key = cascade_id.map(|s| s.to_string()).unwrap_or_else(|| "_latest".to_string());
|
||||
let mut latest = self.latest_usage.write().await;
|
||||
latest.insert(key, usage);
|
||||
|
||||
// Evict old entries to prevent unbounded memory growth
|
||||
const MAX_ENTRIES: usize = 500;
|
||||
if latest.len() > MAX_ENTRIES {
|
||||
// Find the oldest entry by captured_at and remove it
|
||||
let oldest_key = latest
|
||||
.iter()
|
||||
.min_by_key(|(_, v)| v.captured_at)
|
||||
.map(|(k, _)| k.clone());
|
||||
if let Some(key) = oldest_key {
|
||||
latest.remove(&key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the latest usage for a cascade, consuming it (one-shot read).
|
||||
///
|
||||
/// Only returns exact cascade_id matches — no cross-cascade fallback.
|
||||
/// The `_latest` key is only consumed when the caller explicitly requests it
|
||||
/// (i.e., when the MITM couldn't identify the cascade).
|
||||
pub async fn take_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
|
||||
let mut latest = self.latest_usage.write().await;
|
||||
latest.remove(cascade_id)
|
||||
}
|
||||
|
||||
/// Peek at the latest usage without consuming it.
|
||||
#[allow(dead_code)]
|
||||
pub async fn peek_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
|
||||
let latest = self.latest_usage.read().await;
|
||||
latest.get(cascade_id)
|
||||
.cloned()
|
||||
}
|
||||
|
||||
/// Get aggregate stats.
|
||||
pub async fn stats(&self) -> MitmStats {
|
||||
self.stats.read().await.clone()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user