feat: forward generation params via MITM + add usageMetadata to Gemini
- Add GenerationParams struct to MitmStore for temperature, top_p, top_k, max_output_tokens, stop_sequences, frequency/presence_penalty - MITM modify_request injects params into request.generationConfig - All 3 endpoints (Completions, Responses, Gemini) store client params - Add usageMetadata to Gemini sync responses (promptTokenCount, candidatesTokenCount, totalTokenCount, thoughtsTokenCount) - Add generation param fields to GeminiRequest (temperature, topP, etc.) - Completions stream_options.include_usage emits final usage chunk - Completions reasoning_tokens in completion_tokens_details - Update endpoint gap analysis doc (all high-priority gaps resolved)
This commit is contained in:
@@ -26,6 +26,8 @@ pub struct ToolContext {
|
||||
pub pending_results: Vec<PendingToolResult>,
|
||||
/// Last captured function calls for history rewriting.
|
||||
pub last_calls: Vec<CapturedFunctionCall>,
|
||||
/// Client-specified generation parameters (temperature, top_p, etc.).
|
||||
pub generation_params: Option<super::store::GenerationParams>,
|
||||
}
|
||||
|
||||
/// Modify a streamGenerateContent request body in-place.
|
||||
@@ -349,6 +351,62 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
||||
}
|
||||
}
|
||||
|
||||
// ── 5. Inject client-specified generation parameters ──────────────────
|
||||
// These override the LS defaults (which are typically absent or conservative).
|
||||
// Google generationConfig fields: temperature, topP, topK, maxOutputTokens,
|
||||
// stopSequences, frequencyPenalty, presencePenalty.
|
||||
if let Some(ref ctx) = tool_ctx {
|
||||
if let Some(ref gp) = ctx.generation_params {
|
||||
// Find or create generationConfig (same path as above)
|
||||
let gc = if let Some(req) = json.get_mut("request").and_then(|v| v.as_object_mut()) {
|
||||
Some(req.entry("generationConfig")
|
||||
.or_insert_with(|| serde_json::json!({})))
|
||||
} else {
|
||||
json.as_object_mut().map(|o| {
|
||||
o.entry("generationConfig")
|
||||
.or_insert_with(|| serde_json::json!({}))
|
||||
})
|
||||
};
|
||||
|
||||
if let Some(gc) = gc.and_then(|v| v.as_object_mut()) {
|
||||
let mut injected: Vec<String> = Vec::new();
|
||||
|
||||
if let Some(t) = gp.temperature {
|
||||
gc.insert("temperature".to_string(), serde_json::json!(t));
|
||||
injected.push(format!("temperature={t}"));
|
||||
}
|
||||
if let Some(p) = gp.top_p {
|
||||
gc.insert("topP".to_string(), serde_json::json!(p));
|
||||
injected.push(format!("topP={p}"));
|
||||
}
|
||||
if let Some(k) = gp.top_k {
|
||||
gc.insert("topK".to_string(), serde_json::json!(k));
|
||||
injected.push(format!("topK={k}"));
|
||||
}
|
||||
if let Some(m) = gp.max_output_tokens {
|
||||
gc.insert("maxOutputTokens".to_string(), serde_json::json!(m));
|
||||
injected.push(format!("maxOutputTokens={m}"));
|
||||
}
|
||||
if let Some(ref seqs) = gp.stop_sequences {
|
||||
gc.insert("stopSequences".to_string(), serde_json::json!(seqs));
|
||||
injected.push(format!("stopSequences({})", seqs.len()));
|
||||
}
|
||||
if let Some(fp) = gp.frequency_penalty {
|
||||
gc.insert("frequencyPenalty".to_string(), serde_json::json!(fp));
|
||||
injected.push(format!("frequencyPenalty={fp}"));
|
||||
}
|
||||
if let Some(pp) = gp.presence_penalty {
|
||||
gc.insert("presencePenalty".to_string(), serde_json::json!(pp));
|
||||
injected.push(format!("presencePenalty={pp}"));
|
||||
}
|
||||
|
||||
if !injected.is_empty() {
|
||||
changes.push(format!("inject generationConfig: {}", injected.join(", ")));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if changes.is_empty() {
|
||||
return None; // Nothing modified
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user