refactor: endpoint parity and proxy improvements

Mixed changes from recent sessions: endpoint feature parity
improvements, proxy bug fixes, and store cleanup.
This commit is contained in:
Nikketryhard
2026-02-16 21:47:00 -06:00
parent 86675fd960
commit 637fbc0e54
5 changed files with 763 additions and 692 deletions

View File

@@ -309,7 +309,11 @@ pub(crate) async fn handle_responses(
count = tools.len(),
"Stored client tools for MITM injection"
);
} else {
state.mitm_store.clear_tools().await;
}
} else {
state.mitm_store.clear_tools().await;
}
if let Some(ref choice) = body.tool_choice {
let gemini_config = openai_tool_choice_to_gemini(choice);
@@ -404,6 +408,8 @@ pub(crate) async fn handle_responses(
// Send message
state.mitm_store.set_active_cascade(&cascade_id).await;
// Store real user text for MITM injection — LS gets a dummy prompt
state.mitm_store.set_pending_user_text(user_text.clone()).await;
// Store image for MITM injection (LS doesn't forward images to Google API)
if let Some(ref img) = image {
use base64::Engine;
@@ -415,9 +421,24 @@ pub(crate) async fn handle_responses(
})
.await;
}
// Pre-flight: install channel BEFORE send_message so the MITM proxy
// can grab it when the LS fires its API call.
let has_custom_tools = state.mitm_store.get_tools().await.is_some();
let mitm_rx = if has_custom_tools {
state.mitm_store.clear_response_async().await;
state.mitm_store.clear_upstream_error().await;
let _ = state.mitm_store.take_any_function_calls().await;
let (tx, rx) = tokio::sync::mpsc::channel(64);
state.mitm_store.set_channel(tx).await;
Some(rx)
} else {
None
};
match state
.backend
.send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref())
.send_message_with_image(&cascade_id, ".", model.model_enum, image.as_ref())
.await
{
Ok((200, _)) => {
@@ -428,6 +449,7 @@ pub(crate) async fn handle_responses(
});
}
Ok((status, _)) => {
state.mitm_store.drop_channel().await;
return err_response(
StatusCode::BAD_GATEWAY,
format!("Antigravity returned {status}"),
@@ -435,6 +457,7 @@ pub(crate) async fn handle_responses(
);
}
Err(e) => {
state.mitm_store.drop_channel().await;
return err_response(
StatusCode::BAD_GATEWAY,
format!("Send message failed: {e}"),
@@ -472,6 +495,7 @@ pub(crate) async fn handle_responses(
cascade_id,
body.timeout,
req_params,
mitm_rx,
)
.await
} else {
@@ -482,6 +506,7 @@ pub(crate) async fn handle_responses(
cascade_id,
body.timeout,
req_params,
mitm_rx,
)
.await
}
@@ -603,54 +628,54 @@ async fn handle_responses_sync(
cascade_id: String,
timeout: u64,
params: RequestParams,
mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
) -> axum::response::Response {
let created_at = now_unix();
let has_custom_tools = state.mitm_store.get_tools().await.is_some();
// Clear stale captured response and upstream errors
state.mitm_store.clear_response_async().await;
state.mitm_store.clear_upstream_error().await;
// Clear stale captured response and upstream errors (only if no pre-installed channel)
if mitm_rx.is_none() {
state.mitm_store.clear_response_async().await;
state.mitm_store.clear_upstream_error().await;
}
// ── MITM bypass: poll MitmStore directly when custom tools active ──
if has_custom_tools {
// ── MITM bypass: channel-based pipeline when custom tools active ──
if let Some(mut rx) = mitm_rx {
let start = std::time::Instant::now();
while start.elapsed().as_secs() < timeout {
// Check for upstream errors from MITM (Google API errors)
if let Some(err) = state.mitm_store.take_upstream_error().await {
return upstream_err_response(&err);
}
// Check for function calls
let captured = state.mitm_store.take_function_calls(&cascade_id).await;
if let Some(ref raw_calls) = captured {
let calls: Vec<_> = if let Some(max) = params.max_tool_calls {
raw_calls.iter().take(max as usize).collect()
} else {
raw_calls.iter().collect()
};
if !calls.is_empty() {
let mut acc_text = String::new();
let mut acc_thinking: Option<String> = None;
let mut last_usage: Option<crate::mitm::store::ApiUsage> = None;
while let Some(event) = tokio::time::timeout(
std::time::Duration::from_secs(timeout.saturating_sub(start.elapsed().as_secs())),
rx.recv(),
).await.ok().flatten() {
use crate::mitm::store::MitmEvent;
match event {
MitmEvent::ThinkingDelta(t) => { acc_thinking = Some(t); }
MitmEvent::TextDelta(t) => { acc_text = t; }
MitmEvent::Usage(u) => { last_usage = Some(u); }
MitmEvent::Grounding(_) => {} // stored by proxy directly
MitmEvent::FunctionCall(raw_calls) => {
let calls: Vec<_> = if let Some(max) = params.max_tool_calls {
raw_calls.iter().take(max as usize).collect()
} else {
raw_calls.iter().collect()
};
let mut output_items: Vec<serde_json::Value> = Vec::new();
for fc in &calls {
let call_id = format!(
"call_{}",
uuid::Uuid::new_v4().to_string().replace('-', "")[..24].to_string()
);
state
.mitm_store
.register_call_id(call_id.clone(), fc.name.clone())
.await;
state.mitm_store.register_call_id(call_id.clone(), fc.name.clone()).await;
let arguments = serde_json::to_string(&fc.args).unwrap_or_default();
output_items
.push(build_function_call_output(&call_id, &fc.name, &arguments));
output_items.push(build_function_call_output(&call_id, &fc.name, &arguments));
}
let (usage, _) = usage_from_poll(
&state.mitm_store,
&cascade_id,
&None,
&params.user_text,
"",
)
.await;
&state.mitm_store, &cascade_id, &None, &params.user_text, "",
).await;
state.mitm_store.drop_channel().await;
let resp = build_response_object(
ResponseData {
id: response_id,
@@ -666,52 +691,61 @@ async fn handle_responses_sync(
);
return Json(resp).into_response();
}
}
MitmEvent::ResponseComplete => {
if acc_text.is_empty() && acc_thinking.is_none() {
// Empty response — continue waiting
continue;
}
if acc_text.is_empty() && acc_thinking.is_some() {
// Thinking-only — LS needs to make a follow-up request.
// Reinstall channel and unblock gate.
let (new_tx, new_rx) = tokio::sync::mpsc::channel(64);
state.mitm_store.set_channel(new_tx).await;
state.mitm_store.clear_request_in_flight();
let _ = state.mitm_store.take_any_function_calls().await;
rx = new_rx;
debug!(
"Responses sync: thinking-only — new channel for follow-up, thinking_len={}",
acc_thinking.as_ref().map(|t| t.len()).unwrap_or(0)
);
continue;
}
let (usage, _) = usage_from_poll(
&state.mitm_store, &cascade_id, &None, &params.user_text, &acc_text,
).await;
state.mitm_store.drop_channel().await;
// Check for completed text response
if state.mitm_store.is_response_complete() {
let text = state
.mitm_store
.take_response_text()
.await
.unwrap_or_default();
let thinking = state.mitm_store.take_thinking_text().await;
let (usage, _) = usage_from_poll(
&state.mitm_store,
&cascade_id,
&None,
&params.user_text,
&text,
)
.await;
let mut output_items: Vec<serde_json::Value> = Vec::new();
if let Some(ref t) = acc_thinking {
output_items.push(build_reasoning_output(t));
}
let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
output_items.push(build_message_output(&msg_id, &acc_text));
let mut output_items: Vec<serde_json::Value> = Vec::new();
if let Some(ref t) = thinking {
output_items.push(build_reasoning_output(t));
let resp = build_response_object(
ResponseData {
id: response_id,
model: model_name,
status: "completed",
created_at,
completed_at: Some(now_unix()),
output: output_items,
usage: Some(usage),
thinking_signature: None,
},
&params,
);
return Json(resp).into_response();
}
MitmEvent::UpstreamError(err) => {
state.mitm_store.drop_channel().await;
return upstream_err_response(&err);
}
let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
output_items.push(build_message_output(&msg_id, &text));
let resp = build_response_object(
ResponseData {
id: response_id,
model: model_name,
status: "completed",
created_at,
completed_at: Some(now_unix()),
output: output_items,
usage: Some(usage),
thinking_signature: None,
},
&params,
);
return Json(resp).into_response();
}
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
}
// Timeout — return proper error, not fake incomplete response
// Timeout
state.mitm_store.drop_channel().await;
return err_response(
StatusCode::GATEWAY_TIMEOUT,
format!("Timeout: no response from Google API after {timeout}s"),
@@ -835,6 +869,7 @@ async fn handle_responses_stream(
cascade_id: String,
timeout: u64,
params: RequestParams,
mitm_rx: Option<tokio::sync::mpsc::Receiver<crate::mitm::store::MitmEvent>>,
) -> axum::response::Response {
let stream = async_stream::stream! {
let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
@@ -886,50 +921,170 @@ async fn handle_responses_stream(
let mut thinking_text: Option<String> = None;
let mut message_started = false;
let reasoning_id = format!("rs_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
let has_custom_tools = state.mitm_store.get_tools().await.is_some();
// Clear stale captured response and upstream errors
state.mitm_store.clear_response_async().await;
state.mitm_store.clear_upstream_error().await;
// Clear stale response (only if no pre-installed channel)
if mitm_rx.is_none() {
state.mitm_store.clear_response_async().await;
state.mitm_store.clear_upstream_error().await;
}
// ── MITM bypass mode (when custom tools are active) ──
// Skip LS entirely — read text, thinking, and tool calls directly from MitmStore.
if has_custom_tools {
// Channel-based pipeline: read events directly from MITM proxy.
// Channel is pre-installed before send_message to avoid race conditions.
if let Some(mut rx) = mitm_rx {
let mut last_thinking = String::new();
while start.elapsed().as_secs() < timeout {
// Check for upstream errors from MITM (Google API errors)
if let Some(err) = state.mitm_store.take_upstream_error().await {
let error_msg = super::util::upstream_error_message(&err);
let error_type = super::util::upstream_error_type(&err);
yield Ok(responses_sse_event(
"response.failed",
serde_json::json!({
"type": "response.failed",
"sequence_number": next_seq(),
"response": {
"id": &response_id,
"status": "failed",
"error": {
"type": error_type,
"message": error_msg,
"code": err.status,
},
},
}),
));
break;
}
while let Some(event) = tokio::time::timeout(
std::time::Duration::from_secs(timeout.saturating_sub(start.elapsed().as_secs())),
rx.recv(),
).await.ok().flatten() {
use crate::mitm::store::MitmEvent;
match event {
MitmEvent::ThinkingDelta(full_thinking) => {
if !thinking_emitted && full_thinking.len() > last_thinking.len() {
// First thinking text — emit reasoning output_item.added
if last_thinking.is_empty() {
yield Ok(responses_sse_event(
"response.output_item.added",
serde_json::json!({
"type": "response.output_item.added",
"sequence_number": next_seq(),
"output_index": 0,
"item": {
"id": &reasoning_id,
"type": "reasoning",
"summary": [],
},
}),
));
yield Ok(responses_sse_event(
"response.reasoning_summary_part.added",
serde_json::json!({
"type": "response.reasoning_summary_part.added",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"part": { "type": "summary_text", "text": "" },
}),
));
}
let delta = &full_thinking[last_thinking.len()..];
if !delta.is_empty() {
yield Ok(responses_sse_event(
"response.reasoning_summary_text.delta",
serde_json::json!({
"type": "response.reasoning_summary_text.delta",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"delta": delta,
}),
));
}
last_thinking = full_thinking;
}
}
MitmEvent::TextDelta(full_text) => {
if full_text.len() > last_text.len() {
// Finalize thinking if started but not done
if !thinking_emitted && !last_thinking.is_empty() {
thinking_emitted = true;
thinking_text = Some(last_thinking.clone());
yield Ok(responses_sse_event(
"response.reasoning_summary_text.done",
serde_json::json!({
"type": "response.reasoning_summary_text.done",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"text": &last_thinking,
}),
));
yield Ok(responses_sse_event(
"response.reasoning_summary_part.done",
serde_json::json!({
"type": "response.reasoning_summary_part.done",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"part": { "type": "summary_text", "text": &last_thinking },
}),
));
yield Ok(responses_sse_event(
"response.output_item.done",
serde_json::json!({
"type": "response.output_item.done",
"sequence_number": next_seq(),
"output_index": 0,
"item": {
"id": &reasoning_id,
"type": "reasoning",
"summary": [{
"type": "summary_text",
"text": &last_thinking,
}],
},
}),
));
}
// Check for function calls first
let captured = state.mitm_store.take_function_calls(&cascade_id).await;
if let Some(ref raw_calls) = captured {
let calls: Vec<_> = if let Some(max) = params.max_tool_calls {
raw_calls.iter().take(max as usize).collect()
} else {
raw_calls.iter().collect()
};
if !calls.is_empty() {
let msg_output_index: u32 = if thinking_emitted { 1 } else { 0 };
if !message_started {
message_started = true;
yield Ok(responses_sse_event(
"response.output_item.added",
serde_json::json!({
"type": "response.output_item.added",
"sequence_number": next_seq(),
"output_index": msg_output_index,
"item": build_message_output_in_progress(&msg_id),
}),
));
yield Ok(responses_sse_event(
"response.content_part.added",
serde_json::json!({
"type": "response.content_part.added",
"sequence_number": next_seq(),
"output_index": msg_output_index,
"content_index": CONTENT_IDX,
"part": {
"type": "output_text",
"text": "",
"annotations": [],
}
}),
));
}
let delta = &full_text[last_text.len()..];
if !delta.is_empty() {
let msg_output_index: u32 = if thinking_emitted { 1 } else { 0 };
yield Ok(responses_sse_event(
"response.output_text.delta",
serde_json::json!({
"type": "response.output_text.delta",
"sequence_number": next_seq(),
"item_id": &msg_id,
"output_index": msg_output_index,
"content_index": CONTENT_IDX,
"delta": delta,
}),
));
last_text = full_text;
}
}
}
MitmEvent::FunctionCall(raw_calls) => {
let calls: Vec<_> = if let Some(max) = params.max_tool_calls {
raw_calls.iter().take(max as usize).collect()
} else {
raw_calls.iter().collect()
};
let msg_output_index: u32 = if thinking_emitted { 1 } else { 0 };
for (i, fc) in calls.iter().enumerate() {
let call_id = format!(
@@ -1011,194 +1166,71 @@ async fn handle_responses_stream(
"response": response_to_json(&final_resp),
}),
));
state.mitm_store.drop_channel().await;
return;
}
}
// Stream thinking text in real-time
if !thinking_emitted {
if let Some(thinking) = state.mitm_store.peek_thinking_text().await {
if !thinking.is_empty() && thinking != last_thinking {
// First thinking text — emit reasoning output_item.added
if last_thinking.is_empty() {
yield Ok(responses_sse_event(
"response.output_item.added",
serde_json::json!({
"type": "response.output_item.added",
"sequence_number": next_seq(),
"output_index": 0,
"item": {
"id": &reasoning_id,
"type": "reasoning",
"summary": [],
},
}),
));
yield Ok(responses_sse_event(
"response.reasoning_summary_part.added",
serde_json::json!({
"type": "response.reasoning_summary_part.added",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"part": { "type": "summary_text", "text": "" },
}),
));
MitmEvent::ResponseComplete => {
if !last_text.is_empty() {
let msg_idx: u32 = if thinking_emitted { 1 } else { 0 };
let (usage, _) = usage_from_poll(
&state.mitm_store, &cascade_id, &None,
&params.user_text, &last_text,
).await;
let tc = thinking_text.clone();
for evt in completion_events(
&response_id, &model_name, &msg_id, &reasoning_id,
msg_idx, CONTENT_IDX, &last_text, usage,
created_at, &seq, &params, None, tc,
) {
yield Ok(evt);
}
// Delta of new thinking text
let delta = if thinking.len() > last_thinking.len()
&& thinking.starts_with(&*last_thinking)
{
thinking[last_thinking.len()..].to_string()
} else {
thinking.clone()
};
if !delta.is_empty() {
yield Ok(responses_sse_event(
"response.reasoning_summary_text.delta",
serde_json::json!({
"type": "response.reasoning_summary_text.delta",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"delta": &delta,
}),
));
}
last_thinking = thinking;
state.mitm_store.drop_channel().await;
return;
} else if !last_thinking.is_empty() {
// Thinking-only response — LS needs follow-up API calls.
// Create a new channel and unblock the gate.
let (new_tx, new_rx) = tokio::sync::mpsc::channel(64);
state.mitm_store.set_channel(new_tx).await;
state.mitm_store.clear_request_in_flight();
let _ = state.mitm_store.take_any_function_calls().await;
rx = new_rx;
debug!(
"Responses stream: thinking-only — new channel for follow-up, thinking_len={}",
last_thinking.len()
);
}
// ResponseComplete with no text and no thinking — continue waiting
}
}
// Stream response text
if let Some(text) = state.mitm_store.peek_response_text().await {
if !text.is_empty() && text != last_text {
// Finalize thinking if started but not done
if !thinking_emitted && !last_thinking.is_empty() {
thinking_emitted = true;
thinking_text = Some(last_thinking.clone());
yield Ok(responses_sse_event(
"response.reasoning_summary_text.done",
serde_json::json!({
"type": "response.reasoning_summary_text.done",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"text": &last_thinking,
}),
));
yield Ok(responses_sse_event(
"response.reasoning_summary_part.done",
serde_json::json!({
"type": "response.reasoning_summary_part.done",
"sequence_number": next_seq(),
"item_id": &reasoning_id,
"output_index": 0,
"summary_index": 0,
"part": { "type": "summary_text", "text": &last_thinking },
}),
));
yield Ok(responses_sse_event(
"response.output_item.done",
serde_json::json!({
"type": "response.output_item.done",
"sequence_number": next_seq(),
"output_index": 0,
"item": {
"id": &reasoning_id,
"type": "reasoning",
"summary": [{
"type": "summary_text",
"text": &last_thinking,
}],
MitmEvent::UpstreamError(err) => {
let error_msg = super::util::upstream_error_message(&err);
let error_type = super::util::upstream_error_type(&err);
yield Ok(responses_sse_event(
"response.failed",
serde_json::json!({
"type": "response.failed",
"sequence_number": next_seq(),
"response": {
"id": &response_id,
"status": "failed",
"error": {
"type": error_type,
"message": error_msg,
"code": err.status,
},
}),
));
}
let msg_output_index: u32 = if thinking_emitted { 1 } else { 0 };
if !message_started {
message_started = true;
yield Ok(responses_sse_event(
"response.output_item.added",
serde_json::json!({
"type": "response.output_item.added",
"sequence_number": next_seq(),
"output_index": msg_output_index,
"item": build_message_output_in_progress(&msg_id),
}),
));
yield Ok(responses_sse_event(
"response.content_part.added",
serde_json::json!({
"type": "response.content_part.added",
"sequence_number": next_seq(),
"output_index": msg_output_index,
"content_index": CONTENT_IDX,
"part": {
"type": "output_text",
"text": "",
"annotations": [],
}
}),
));
}
let new_content = if text.len() > last_text.len()
&& text.starts_with(&*last_text)
{
text[last_text.len()..].to_string()
} else {
text.clone()
};
if !new_content.is_empty() {
yield Ok(responses_sse_event(
"response.output_text.delta",
serde_json::json!({
"type": "response.output_text.delta",
"sequence_number": next_seq(),
"item_id": &msg_id,
"output_index": msg_output_index,
"content_index": CONTENT_IDX,
"delta": &new_content,
}),
));
last_text = text;
}
}
// Check if response is complete
if state.mitm_store.is_response_complete() && !last_text.is_empty() {
let msg_idx: u32 = if thinking_emitted { 1 } else { 0 };
let (usage, _) = usage_from_poll(
&state.mitm_store, &cascade_id, &None,
&params.user_text, &last_text,
).await;
let tc = thinking_text.clone();
for evt in completion_events(
&response_id, &model_name, &msg_id, &reasoning_id,
msg_idx, CONTENT_IDX, &last_text, usage,
created_at, &seq, &params, None, tc,
) {
yield Ok(evt);
}
},
}),
));
state.mitm_store.drop_channel().await;
return;
}
MitmEvent::Usage(_) | MitmEvent::Grounding(_) => {
// Usage/grounding stored by proxy, consumed via usage_from_poll
}
}
// Poll interval
let poll_ms: u64 = rand::thread_rng().gen_range(150..300);
tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
}
// Timeout in bypass mode — emit error, not fake incomplete
// Timeout in channel mode
state.mitm_store.drop_channel().await;
yield Ok(responses_sse_event(
"response.failed",
serde_json::json!({