fix: block ALL LS follow-up requests across connections

Move the in-flight blocking check to the top of the LLM request flow,
BEFORE request modification. This catches follow-ups on ALL connections
(the LS opens multiple parallel TLS connections). Only the very first
modified request reaches Google — all others get fake STOP responses.

Previously, each new connection independently allowed one request
through before blocking, letting 4-5 requests leak per turn.
This commit is contained in:
Nikketryhard
2026-02-16 00:57:33 -06:00
parent a8f3c8915f
commit 3fdd0368a0
23 changed files with 992 additions and 568 deletions

View File

@@ -44,7 +44,6 @@ pub fn router(state: Arc<AppState>) -> Router {
post(completions::handle_completions),
)
.route("/v1/gemini", post(gemini::handle_gemini))
.route("/v1/models", get(handle_models))
.route("/v1/sessions", get(handle_list_sessions))
.route("/v1/sessions/{id}", delete(handle_delete_session))
@@ -106,9 +105,7 @@ async fn handle_models() -> Json<serde_json::Value> {
Json(serde_json::json!({"object": "list", "data": models}))
}
async fn handle_list_sessions(
State(state): State<Arc<AppState>>,
) -> Json<serde_json::Value> {
async fn handle_list_sessions(State(state): State<Arc<AppState>>) -> Json<serde_json::Value> {
let sessions = state.sessions.list_sessions().await;
Json(serde_json::json!({"sessions": sessions}))
}
@@ -155,9 +152,7 @@ async fn handle_set_token(
)
}
async fn handle_usage(
State(state): State<Arc<AppState>>,
) -> Json<serde_json::Value> {
async fn handle_usage(State(state): State<Arc<AppState>>) -> Json<serde_json::Value> {
let stats = state.mitm_store.stats().await;
Json(serde_json::json!({
"mitm": {
@@ -174,9 +169,7 @@ async fn handle_usage(
}))
}
async fn handle_quota(
State(state): State<Arc<AppState>>,
) -> Json<serde_json::Value> {
async fn handle_quota(State(state): State<Arc<AppState>>) -> Json<serde_json::Value> {
let snap = state.quota_store.snapshot().await;
Json(serde_json::to_value(snap).unwrap_or_default())
}