feat: MITM interception for standalone LS with UID isolation
- Spawn standalone LS as dedicated 'antigravity-ls' user via sudo - UID-scoped iptables redirect (port 443 → MITM proxy) via mitm-redirect.sh - Combined CA bundle (system CAs + MITM CA) for Go TLS trust - Transparent TLS interception with chunked response detection - Google SSE parser for streamGenerateContent usage extraction - Timeouts on all MITM operations (TLS handshake, upstream, idle) - Forward response data immediately (no buffering) - Per-model token usage capture (input, output, thinking) - Update docs and known issues to reflect resolved TLS blocker
This commit is contained in:
@@ -56,9 +56,11 @@ pub struct StreamingAccumulator {
|
||||
pub output_tokens: u64,
|
||||
pub cache_creation_input_tokens: u64,
|
||||
pub cache_read_input_tokens: u64,
|
||||
pub thinking_tokens: u64,
|
||||
pub model: Option<String>,
|
||||
pub stop_reason: Option<String>,
|
||||
pub is_complete: bool,
|
||||
pub api_provider: Option<String>,
|
||||
}
|
||||
|
||||
impl StreamingAccumulator {
|
||||
@@ -66,13 +68,46 @@ impl StreamingAccumulator {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Process a single SSE event.
|
||||
/// Process a single SSE event.
|
||||
pub fn process_event(&mut self, event: &Value) {
|
||||
// ── Google format: {"response": {"usageMetadata": {...}, "modelVersion": "..."}} ──
|
||||
if let Some(response) = event.get("response") {
|
||||
// Extract usage metadata (each event has cumulative counts)
|
||||
if let Some(usage) = response.get("usageMetadata") {
|
||||
self.input_tokens = usage["promptTokenCount"].as_u64().unwrap_or(self.input_tokens);
|
||||
self.output_tokens = usage["candidatesTokenCount"].as_u64().unwrap_or(self.output_tokens);
|
||||
self.thinking_tokens = usage["thoughtsTokenCount"].as_u64().unwrap_or(self.thinking_tokens);
|
||||
}
|
||||
if let Some(model) = response["modelVersion"].as_str() {
|
||||
self.model = Some(model.to_string());
|
||||
}
|
||||
// Check for completion in candidates
|
||||
if let Some(candidates) = response.get("candidates").and_then(|c| c.as_array()) {
|
||||
for candidate in candidates {
|
||||
if let Some(reason) = candidate["finishReason"].as_str() {
|
||||
self.stop_reason = Some(reason.to_string());
|
||||
if reason == "STOP" {
|
||||
self.is_complete = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.api_provider = Some("google".to_string());
|
||||
trace!(
|
||||
input = self.input_tokens,
|
||||
output = self.output_tokens,
|
||||
thinking = self.thinking_tokens,
|
||||
complete = self.is_complete,
|
||||
"SSE Google: usage update"
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Anthropic format: {"type": "message_start"|"message_delta"|"message_stop"} ──
|
||||
let event_type = event["type"].as_str().unwrap_or("");
|
||||
|
||||
match event_type {
|
||||
"message_start" => {
|
||||
// message_start contains the initial usage (input tokens + cache)
|
||||
if let Some(usage) = event.get("message").and_then(|m| m.get("usage")) {
|
||||
self.input_tokens = usage["input_tokens"].as_u64().unwrap_or(0);
|
||||
self.cache_creation_input_tokens = usage["cache_creation_input_tokens"].as_u64().unwrap_or(0);
|
||||
@@ -81,36 +116,27 @@ impl StreamingAccumulator {
|
||||
if let Some(model) = event.get("message").and_then(|m| m["model"].as_str()) {
|
||||
self.model = Some(model.to_string());
|
||||
}
|
||||
trace!(
|
||||
input = self.input_tokens,
|
||||
cache_read = self.cache_read_input_tokens,
|
||||
cache_create = self.cache_creation_input_tokens,
|
||||
"SSE message_start: captured input usage"
|
||||
);
|
||||
self.api_provider = Some("anthropic".to_string());
|
||||
trace!(input = self.input_tokens, "SSE Anthropic: message_start");
|
||||
}
|
||||
"message_delta" => {
|
||||
// message_delta contains the output usage
|
||||
if let Some(usage) = event.get("usage") {
|
||||
self.output_tokens = usage["output_tokens"].as_u64().unwrap_or(self.output_tokens);
|
||||
}
|
||||
if let Some(reason) = event["delta"]["stop_reason"].as_str() {
|
||||
self.stop_reason = Some(reason.to_string());
|
||||
}
|
||||
trace!(output = self.output_tokens, "SSE message_delta: updated output tokens");
|
||||
}
|
||||
"message_stop" => {
|
||||
self.is_complete = true;
|
||||
debug!(
|
||||
input = self.input_tokens,
|
||||
output = self.output_tokens,
|
||||
cache_read = self.cache_read_input_tokens,
|
||||
model = ?self.model,
|
||||
"SSE message_stop: stream complete"
|
||||
"SSE Anthropic: stream complete"
|
||||
);
|
||||
}
|
||||
"content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {
|
||||
// Content events — no usage data, just pass through
|
||||
}
|
||||
"content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {}
|
||||
_ => {
|
||||
trace!(event_type, "SSE: unknown event type");
|
||||
}
|
||||
@@ -124,11 +150,11 @@ impl StreamingAccumulator {
|
||||
output_tokens: self.output_tokens,
|
||||
cache_creation_input_tokens: self.cache_creation_input_tokens,
|
||||
cache_read_input_tokens: self.cache_read_input_tokens,
|
||||
thinking_output_tokens: 0,
|
||||
thinking_output_tokens: self.thinking_tokens,
|
||||
response_output_tokens: 0,
|
||||
model: self.model,
|
||||
stop_reason: self.stop_reason,
|
||||
api_provider: Some("anthropic".to_string()),
|
||||
api_provider: self.api_provider.unwrap_or_else(|| "unknown".to_string()).into(),
|
||||
grpc_method: None,
|
||||
captured_at: std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
|
||||
Reference in New Issue
Block a user