feat: forward generation params via MITM + add usageMetadata to Gemini
- Add GenerationParams struct to MitmStore for temperature, top_p, top_k, max_output_tokens, stop_sequences, frequency/presence_penalty - MITM modify_request injects params into request.generationConfig - All 3 endpoints (Completions, Responses, Gemini) store client params - Add usageMetadata to Gemini sync responses (promptTokenCount, candidatesTokenCount, totalTokenCount, thoughtsTokenCount) - Add generation param fields to GeminiRequest (temperature, topP, etc.) - Completions stream_options.include_usage emits final usage chunk - Completions reasoning_tokens in completion_tokens_details - Update endpoint gap analysis doc (all high-priority gaps resolved)
This commit is contained in:
@@ -242,6 +242,25 @@ pub(crate) async fn handle_responses(
|
||||
state.mitm_store.set_tool_config(gemini_config).await;
|
||||
}
|
||||
|
||||
// Store generation parameters for MITM injection
|
||||
{
|
||||
use crate::mitm::store::GenerationParams;
|
||||
let gp = GenerationParams {
|
||||
temperature: body.temperature,
|
||||
top_p: body.top_p,
|
||||
top_k: None,
|
||||
max_output_tokens: body.max_output_tokens,
|
||||
stop_sequences: None,
|
||||
frequency_penalty: None,
|
||||
presence_penalty: None,
|
||||
};
|
||||
if gp.temperature.is_some() || gp.top_p.is_some() || gp.max_output_tokens.is_some() {
|
||||
state.mitm_store.set_generation_params(gp).await;
|
||||
} else {
|
||||
state.mitm_store.clear_generation_params().await;
|
||||
}
|
||||
}
|
||||
|
||||
let response_id = format!(
|
||||
"resp_{}",
|
||||
uuid::Uuid::new_v4().to_string().replace('-', "")
|
||||
@@ -1003,19 +1022,21 @@ async fn handle_responses_stream(
|
||||
}
|
||||
};
|
||||
|
||||
let mut thinking_started = false;
|
||||
let mut thinking_done = false;
|
||||
let mut last_thinking_len: usize = 0;
|
||||
|
||||
while start.elapsed().as_secs() < timeout {
|
||||
if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await {
|
||||
if status == 200 {
|
||||
if let Some(steps) = data["steps"].as_array() {
|
||||
|
||||
// Check for thinking content (appears before response text)
|
||||
if !thinking_emitted {
|
||||
if let Some(tc) = extract_thinking_content(steps) {
|
||||
thinking_text = Some(tc.clone());
|
||||
thinking_emitted = true;
|
||||
// ── Phase 1: Stream thinking deltas progressively ──
|
||||
if let Some(tc) = extract_thinking_content(steps) {
|
||||
if !thinking_started {
|
||||
// First time we see thinking — emit structure events
|
||||
thinking_started = true;
|
||||
|
||||
// Emit full reasoning event sequence at output_index 0
|
||||
yield Ok(responses_sse_event(
|
||||
"response.output_item.added",
|
||||
serde_json::json!({
|
||||
@@ -1040,6 +1061,14 @@ async fn handle_responses_stream(
|
||||
"part": { "type": "summary_text", "text": "" },
|
||||
}),
|
||||
));
|
||||
}
|
||||
|
||||
// Emit delta if thinking text has grown
|
||||
if tc.len() > last_thinking_len {
|
||||
let delta = &tc[last_thinking_len..];
|
||||
last_thinking_len = tc.len();
|
||||
thinking_text = Some(tc.clone());
|
||||
|
||||
yield Ok(responses_sse_event(
|
||||
"response.reasoning_summary_text.delta",
|
||||
serde_json::json!({
|
||||
@@ -1048,9 +1077,22 @@ async fn handle_responses_stream(
|
||||
"item_id": &reasoning_id,
|
||||
"output_index": 0,
|
||||
"summary_index": 0,
|
||||
"delta": &tc,
|
||||
"delta": delta,
|
||||
}),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// ── Phase 2: Stream text deltas ──
|
||||
let text = extract_response_text(steps);
|
||||
let msg_output_index: u32 = if thinking_started { 1 } else { 0 };
|
||||
|
||||
if !text.is_empty() && text != last_text {
|
||||
// Finalize thinking when response text first appears
|
||||
if thinking_started && !thinking_done {
|
||||
thinking_done = true;
|
||||
let final_thinking = thinking_text.clone().unwrap_or_default();
|
||||
|
||||
yield Ok(responses_sse_event(
|
||||
"response.reasoning_summary_text.done",
|
||||
serde_json::json!({
|
||||
@@ -1059,7 +1101,7 @@ async fn handle_responses_stream(
|
||||
"item_id": &reasoning_id,
|
||||
"output_index": 0,
|
||||
"summary_index": 0,
|
||||
"text": &tc,
|
||||
"text": &final_thinking,
|
||||
}),
|
||||
));
|
||||
yield Ok(responses_sse_event(
|
||||
@@ -1070,7 +1112,7 @@ async fn handle_responses_stream(
|
||||
"item_id": &reasoning_id,
|
||||
"output_index": 0,
|
||||
"summary_index": 0,
|
||||
"part": { "type": "summary_text", "text": &tc },
|
||||
"part": { "type": "summary_text", "text": &final_thinking },
|
||||
}),
|
||||
));
|
||||
yield Ok(responses_sse_event(
|
||||
@@ -1084,19 +1126,13 @@ async fn handle_responses_stream(
|
||||
"type": "reasoning",
|
||||
"summary": [{
|
||||
"type": "summary_text",
|
||||
"text": &tc,
|
||||
"text": &final_thinking,
|
||||
}],
|
||||
},
|
||||
}),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// ── Phase 2: Stream text deltas ──
|
||||
let text = extract_response_text(steps);
|
||||
let msg_output_index: u32 = if thinking_emitted { 1 } else { 0 };
|
||||
|
||||
if !text.is_empty() && text != last_text {
|
||||
// Emit message output_item.added on first text
|
||||
if !message_started {
|
||||
message_started = true;
|
||||
@@ -1153,7 +1189,7 @@ async fn handle_responses_stream(
|
||||
if is_response_done(steps) && !last_text.is_empty() {
|
||||
debug!("Response done, text length={}", last_text.len());
|
||||
let mu = extract_model_usage(steps);
|
||||
let msg_idx: u32 = if thinking_emitted { 1 } else { 0 };
|
||||
let msg_idx: u32 = if thinking_started { 1 } else { 0 };
|
||||
let (usage, mitm_thinking) = usage_from_poll(&state.mitm_store, &cascade_id, &mu, ¶ms.user_text, &last_text).await;
|
||||
let ts = extract_thinking_signature(steps);
|
||||
// Use already-captured thinking, or MITM thinking, or LS thinking
|
||||
@@ -1179,7 +1215,7 @@ async fn handle_responses_stream(
|
||||
if run_status.contains("IDLE") && !last_text.is_empty() {
|
||||
debug!("Trajectory IDLE, text length={}", last_text.len());
|
||||
let mu = extract_model_usage(steps);
|
||||
let msg_idx: u32 = if thinking_emitted { 1 } else { 0 };
|
||||
let msg_idx: u32 = if thinking_started { 1 } else { 0 };
|
||||
let (usage, mitm_thinking) = usage_from_poll(&state.mitm_store, &cascade_id, &mu, ¶ms.user_text, &last_text).await;
|
||||
let ts = extract_thinking_signature(steps);
|
||||
let tc = thinking_text.clone()
|
||||
|
||||
Reference in New Issue
Block a user