fix: block ALL LS follow-up requests across connections

Move the in-flight blocking check to the top of the LLM request flow, BEFORE request modification. This catches follow-ups on ALL connections (the LS opens multiple parallel TLS connections). Only the very first modified request reaches Google — all others get fake STOP responses. Previously, each new connection independently allowed one request through before blocking, letting 4-5 requests leak per turn.
2026-02-16 00:57:33 -06:00
parent a8f3c8915f
commit 3fdd0368a0
23 changed files with 992 additions and 568 deletions
--- a/src/mitm/proto.rs
+++ b/src/mitm/proto.rs
@@ -251,7 +251,10 @@ fn looks_like_valid_message(fields: &[ProtoField], original_len: usize) -> bool
    // (e.g., a long string that happened to have a valid first-field prefix)
    if fields.len() == 1 && original_len > 100 {
        // Single-field messages of >100 bytes are suspicious unless the field is bytes/message
-        matches!(&fields[0].value, ProtoValue::Bytes(_) | ProtoValue::Message(_))
+        matches!(
+            &fields[0].value,
+            ProtoValue::Bytes(_) | ProtoValue::Message(_)
+        )
    } else {
        true
    }
@@ -328,7 +331,9 @@ fn try_extract_usage(fields: &[ProtoField]) -> Option<GrpcUsage> {
        .iter()
        .filter_map(|f| {
            if let ProtoValue::Bytes(ref b) = f.value {
-                std::str::from_utf8(b).ok().map(|s| (f.number, s.to_string()))
+                std::str::from_utf8(b)
+                    .ok()
+                    .map(|s| (f.number, s.to_string()))
            } else {
                None
            }
@@ -361,14 +366,23 @@ fn try_extract_usage(fields: &[ProtoField]) -> Option<GrpcUsage> {
    // Check if there's a model-like string (field 7 = message_id or field 11 = response_id
    // can contain model names, or model enum values map to known names)
    let has_model_string = string_fields.iter().any(|(_, s)| {
-        s.contains("claude") || s.contains("gemini") || s.contains("gpt")
-            || s.starts_with("models/") || s.contains("sonnet") || s.contains("opus")
-            || s.contains("flash") || s.contains("pro")
+        s.contains("claude")
+            || s.contains("gemini")
+            || s.contains("gpt")
+            || s.starts_with("models/")
+            || s.contains("sonnet")
+            || s.contains("opus")
+            || s.contains("flash")
+            || s.contains("pro")
    });

    // Check for fields at the known ModelUsageStats field numbers
-    let has_field_2 = fields.iter().any(|f| f.number == 2 && matches!(f.value, ProtoValue::Varint(_)));
-    let has_field_3 = fields.iter().any(|f| f.number == 3 && matches!(f.value, ProtoValue::Varint(_)));
+    let has_field_2 = fields
+        .iter()
+        .any(|f| f.number == 2 && matches!(f.value, ProtoValue::Varint(_)));
+    let has_field_3 = fields
+        .iter()
+        .any(|f| f.number == 3 && matches!(f.value, ProtoValue::Varint(_)));

    // Strong signal: has both input and output token fields
    let is_likely_usage = (has_field_2 && has_field_3) || has_model_string;
@@ -392,8 +406,8 @@ fn try_extract_usage(fields: &[ProtoField]) -> Option<GrpcUsage> {
                    // field 1 = model enum (varint, not string!)
                    2 => usage.input_tokens = v,
                    3 => usage.output_tokens = v,
-                    4 => usage.cache_write_tokens = v,    // VERIFIED: field 4
-                    5 => usage.cache_read_tokens = v,     // VERIFIED: field 5
+                    4 => usage.cache_write_tokens = v, // VERIFIED: field 4
+                    5 => usage.cache_read_tokens = v,  // VERIFIED: field 5
                    // field 6 = api_provider enum (varint)
                    9 => usage.thinking_output_tokens = v, // VERIFIED: field 9
                    10 => usage.response_output_tokens = v, // VERIFIED: field 10
@@ -486,11 +500,11 @@ pub fn parse_grpc_response_for_usage(body: &[u8]) -> Option<GrpcUsage> {
 fn model_enum_name(enum_val: u64) -> &'static str {
    match enum_val {
        // Placeholder models (1000 + N)
-        1007 => "gemini-3-pro",          // MODEL_PLACEHOLDER_M7
-        1008 => "gemini-3-pro-high",     // MODEL_PLACEHOLDER_M8
-        1012 => "claude-opus-4.5",       // MODEL_PLACEHOLDER_M12
-        1018 => "gemini-3-flash",        // MODEL_PLACEHOLDER_M18
-        1026 => "claude-opus-4.6",       // MODEL_PLACEHOLDER_M26
+        1007 => "gemini-3-pro",      // MODEL_PLACEHOLDER_M7
+        1008 => "gemini-3-pro-high", // MODEL_PLACEHOLDER_M8
+        1012 => "claude-opus-4.5",   // MODEL_PLACEHOLDER_M12
+        1018 => "gemini-3-flash",    // MODEL_PLACEHOLDER_M18
+        1026 => "claude-opus-4.6",   // MODEL_PLACEHOLDER_M26

        // Claude models (named)
        281 => "claude-4-sonnet",
@@ -629,13 +643,13 @@ mod tests {
            data.push(v as u8);
        }

-        encode_varint_field(&mut data, 1, 5);      // model enum
-        encode_varint_field(&mut data, 2, 1000);    // input_tokens
-        encode_varint_field(&mut data, 3, 500);     // output_tokens
-        encode_varint_field(&mut data, 4, 100);     // cache_write_tokens
-        encode_varint_field(&mut data, 5, 200);     // cache_read_tokens
-        encode_varint_field(&mut data, 9, 300);     // thinking_output_tokens
-        encode_varint_field(&mut data, 10, 200);    // response_output_tokens
+        encode_varint_field(&mut data, 1, 5); // model enum
+        encode_varint_field(&mut data, 2, 1000); // input_tokens
+        encode_varint_field(&mut data, 3, 500); // output_tokens
+        encode_varint_field(&mut data, 4, 100); // cache_write_tokens
+        encode_varint_field(&mut data, 5, 200); // cache_read_tokens
+        encode_varint_field(&mut data, 9, 300); // thinking_output_tokens
+        encode_varint_field(&mut data, 10, 200); // response_output_tokens

        let fields = decode_proto(&data);
        let usage = try_extract_usage(&fields).expect("should extract usage");