fix: block ALL LS follow-up requests across connections
Move the in-flight blocking check to the top of the LLM request flow, BEFORE request modification. This catches follow-ups on ALL connections (the LS opens multiple parallel TLS connections). Only the very first modified request reaches Google — all others get fake STOP responses. Previously, each new connection independently allowed one request through before blocking, letting 4-5 requests leak per turn.
This commit is contained in:
@@ -251,7 +251,10 @@ fn looks_like_valid_message(fields: &[ProtoField], original_len: usize) -> bool
|
||||
// (e.g., a long string that happened to have a valid first-field prefix)
|
||||
if fields.len() == 1 && original_len > 100 {
|
||||
// Single-field messages of >100 bytes are suspicious unless the field is bytes/message
|
||||
matches!(&fields[0].value, ProtoValue::Bytes(_) | ProtoValue::Message(_))
|
||||
matches!(
|
||||
&fields[0].value,
|
||||
ProtoValue::Bytes(_) | ProtoValue::Message(_)
|
||||
)
|
||||
} else {
|
||||
true
|
||||
}
|
||||
@@ -328,7 +331,9 @@ fn try_extract_usage(fields: &[ProtoField]) -> Option<GrpcUsage> {
|
||||
.iter()
|
||||
.filter_map(|f| {
|
||||
if let ProtoValue::Bytes(ref b) = f.value {
|
||||
std::str::from_utf8(b).ok().map(|s| (f.number, s.to_string()))
|
||||
std::str::from_utf8(b)
|
||||
.ok()
|
||||
.map(|s| (f.number, s.to_string()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -361,14 +366,23 @@ fn try_extract_usage(fields: &[ProtoField]) -> Option<GrpcUsage> {
|
||||
// Check if there's a model-like string (field 7 = message_id or field 11 = response_id
|
||||
// can contain model names, or model enum values map to known names)
|
||||
let has_model_string = string_fields.iter().any(|(_, s)| {
|
||||
s.contains("claude") || s.contains("gemini") || s.contains("gpt")
|
||||
|| s.starts_with("models/") || s.contains("sonnet") || s.contains("opus")
|
||||
|| s.contains("flash") || s.contains("pro")
|
||||
s.contains("claude")
|
||||
|| s.contains("gemini")
|
||||
|| s.contains("gpt")
|
||||
|| s.starts_with("models/")
|
||||
|| s.contains("sonnet")
|
||||
|| s.contains("opus")
|
||||
|| s.contains("flash")
|
||||
|| s.contains("pro")
|
||||
});
|
||||
|
||||
// Check for fields at the known ModelUsageStats field numbers
|
||||
let has_field_2 = fields.iter().any(|f| f.number == 2 && matches!(f.value, ProtoValue::Varint(_)));
|
||||
let has_field_3 = fields.iter().any(|f| f.number == 3 && matches!(f.value, ProtoValue::Varint(_)));
|
||||
let has_field_2 = fields
|
||||
.iter()
|
||||
.any(|f| f.number == 2 && matches!(f.value, ProtoValue::Varint(_)));
|
||||
let has_field_3 = fields
|
||||
.iter()
|
||||
.any(|f| f.number == 3 && matches!(f.value, ProtoValue::Varint(_)));
|
||||
|
||||
// Strong signal: has both input and output token fields
|
||||
let is_likely_usage = (has_field_2 && has_field_3) || has_model_string;
|
||||
@@ -392,8 +406,8 @@ fn try_extract_usage(fields: &[ProtoField]) -> Option<GrpcUsage> {
|
||||
// field 1 = model enum (varint, not string!)
|
||||
2 => usage.input_tokens = v,
|
||||
3 => usage.output_tokens = v,
|
||||
4 => usage.cache_write_tokens = v, // VERIFIED: field 4
|
||||
5 => usage.cache_read_tokens = v, // VERIFIED: field 5
|
||||
4 => usage.cache_write_tokens = v, // VERIFIED: field 4
|
||||
5 => usage.cache_read_tokens = v, // VERIFIED: field 5
|
||||
// field 6 = api_provider enum (varint)
|
||||
9 => usage.thinking_output_tokens = v, // VERIFIED: field 9
|
||||
10 => usage.response_output_tokens = v, // VERIFIED: field 10
|
||||
@@ -486,11 +500,11 @@ pub fn parse_grpc_response_for_usage(body: &[u8]) -> Option<GrpcUsage> {
|
||||
fn model_enum_name(enum_val: u64) -> &'static str {
|
||||
match enum_val {
|
||||
// Placeholder models (1000 + N)
|
||||
1007 => "gemini-3-pro", // MODEL_PLACEHOLDER_M7
|
||||
1008 => "gemini-3-pro-high", // MODEL_PLACEHOLDER_M8
|
||||
1012 => "claude-opus-4.5", // MODEL_PLACEHOLDER_M12
|
||||
1018 => "gemini-3-flash", // MODEL_PLACEHOLDER_M18
|
||||
1026 => "claude-opus-4.6", // MODEL_PLACEHOLDER_M26
|
||||
1007 => "gemini-3-pro", // MODEL_PLACEHOLDER_M7
|
||||
1008 => "gemini-3-pro-high", // MODEL_PLACEHOLDER_M8
|
||||
1012 => "claude-opus-4.5", // MODEL_PLACEHOLDER_M12
|
||||
1018 => "gemini-3-flash", // MODEL_PLACEHOLDER_M18
|
||||
1026 => "claude-opus-4.6", // MODEL_PLACEHOLDER_M26
|
||||
|
||||
// Claude models (named)
|
||||
281 => "claude-4-sonnet",
|
||||
@@ -629,13 +643,13 @@ mod tests {
|
||||
data.push(v as u8);
|
||||
}
|
||||
|
||||
encode_varint_field(&mut data, 1, 5); // model enum
|
||||
encode_varint_field(&mut data, 2, 1000); // input_tokens
|
||||
encode_varint_field(&mut data, 3, 500); // output_tokens
|
||||
encode_varint_field(&mut data, 4, 100); // cache_write_tokens
|
||||
encode_varint_field(&mut data, 5, 200); // cache_read_tokens
|
||||
encode_varint_field(&mut data, 9, 300); // thinking_output_tokens
|
||||
encode_varint_field(&mut data, 10, 200); // response_output_tokens
|
||||
encode_varint_field(&mut data, 1, 5); // model enum
|
||||
encode_varint_field(&mut data, 2, 1000); // input_tokens
|
||||
encode_varint_field(&mut data, 3, 500); // output_tokens
|
||||
encode_varint_field(&mut data, 4, 100); // cache_write_tokens
|
||||
encode_varint_field(&mut data, 5, 200); // cache_read_tokens
|
||||
encode_varint_field(&mut data, 9, 300); // thinking_output_tokens
|
||||
encode_varint_field(&mut data, 10, 200); // response_output_tokens
|
||||
|
||||
let fields = decode_proto(&data);
|
||||
let usage = try_extract_usage(&fields).expect("should extract usage");
|
||||
|
||||
Reference in New Issue
Block a user