feat: forward generation params via MITM + add usageMetadata to Gemini

- Add GenerationParams struct to MitmStore for temperature, top_p,
  top_k, max_output_tokens, stop_sequences, frequency/presence_penalty
- MITM modify_request injects params into request.generationConfig
- All 3 endpoints (Completions, Responses, Gemini) store client params
- Add usageMetadata to Gemini sync responses (promptTokenCount,
  candidatesTokenCount, totalTokenCount, thoughtsTokenCount)
- Add generation param fields to GeminiRequest (temperature, topP, etc.)
- Completions stream_options.include_usage emits final usage chunk
- Completions reasoning_tokens in completion_tokens_details
- Update endpoint gap analysis doc (all high-priority gaps resolved)
This commit is contained in:
Nikketryhard
2026-02-15 14:23:05 -06:00
parent 735c3e357d
commit b1bd57ab5e
9 changed files with 1216 additions and 46 deletions

View File

@@ -26,6 +26,8 @@ pub struct ToolContext {
pub pending_results: Vec<PendingToolResult>,
/// Last captured function calls for history rewriting.
pub last_calls: Vec<CapturedFunctionCall>,
/// Client-specified generation parameters (temperature, top_p, etc.).
pub generation_params: Option<super::store::GenerationParams>,
}
/// Modify a streamGenerateContent request body in-place.
@@ -349,6 +351,62 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
}
}
// ── 5. Inject client-specified generation parameters ──────────────────
// These override the LS defaults (which are typically absent or conservative).
// Google generationConfig fields: temperature, topP, topK, maxOutputTokens,
// stopSequences, frequencyPenalty, presencePenalty.
if let Some(ref ctx) = tool_ctx {
if let Some(ref gp) = ctx.generation_params {
// Find or create generationConfig (same path as above)
let gc = if let Some(req) = json.get_mut("request").and_then(|v| v.as_object_mut()) {
Some(req.entry("generationConfig")
.or_insert_with(|| serde_json::json!({})))
} else {
json.as_object_mut().map(|o| {
o.entry("generationConfig")
.or_insert_with(|| serde_json::json!({}))
})
};
if let Some(gc) = gc.and_then(|v| v.as_object_mut()) {
let mut injected: Vec<String> = Vec::new();
if let Some(t) = gp.temperature {
gc.insert("temperature".to_string(), serde_json::json!(t));
injected.push(format!("temperature={t}"));
}
if let Some(p) = gp.top_p {
gc.insert("topP".to_string(), serde_json::json!(p));
injected.push(format!("topP={p}"));
}
if let Some(k) = gp.top_k {
gc.insert("topK".to_string(), serde_json::json!(k));
injected.push(format!("topK={k}"));
}
if let Some(m) = gp.max_output_tokens {
gc.insert("maxOutputTokens".to_string(), serde_json::json!(m));
injected.push(format!("maxOutputTokens={m}"));
}
if let Some(ref seqs) = gp.stop_sequences {
gc.insert("stopSequences".to_string(), serde_json::json!(seqs));
injected.push(format!("stopSequences({})", seqs.len()));
}
if let Some(fp) = gp.frequency_penalty {
gc.insert("frequencyPenalty".to_string(), serde_json::json!(fp));
injected.push(format!("frequencyPenalty={fp}"));
}
if let Some(pp) = gp.presence_penalty {
gc.insert("presencePenalty".to_string(), serde_json::json!(pp));
injected.push(format!("presencePenalty={pp}"));
}
if !injected.is_empty() {
changes.push(format!("inject generationConfig: {}", injected.join(", ")));
}
}
}
}
if changes.is_empty() {
return None; // Nothing modified
}