feat: forward generation params via MITM + add usageMetadata to Gemini

- Add GenerationParams struct to MitmStore for temperature, top_p,
  top_k, max_output_tokens, stop_sequences, frequency/presence_penalty
- MITM modify_request injects params into request.generationConfig
- All 3 endpoints (Completions, Responses, Gemini) store client params
- Add usageMetadata to Gemini sync responses (promptTokenCount,
  candidatesTokenCount, totalTokenCount, thoughtsTokenCount)
- Add generation param fields to GeminiRequest (temperature, topP, etc.)
- Completions stream_options.include_usage emits final usage chunk
- Completions reasoning_tokens in completion_tokens_details
- Update endpoint gap analysis doc (all high-priority gaps resolved)
This commit is contained in:
Nikketryhard
2026-02-15 14:23:05 -06:00
parent 735c3e357d
commit b1bd57ab5e
9 changed files with 1216 additions and 46 deletions

View File

@@ -60,6 +60,21 @@ pub struct PendingToolResult {
pub result: serde_json::Value,
}
/// Client-specified generation parameters for MITM injection.
/// Set by API handlers, consumed by the MITM modify layer.
#[derive(Debug, Clone, Default)]
pub struct GenerationParams {
pub temperature: Option<f64>,
pub top_p: Option<f64>,
pub top_k: Option<u32>,
pub max_output_tokens: Option<u64>,
pub stop_sequences: Option<Vec<String>>,
/// Frequency penalty (OpenAI) — mapped to frequencyPenalty in Gemini.
pub frequency_penalty: Option<f64>,
/// Presence penalty (OpenAI) — mapped to presencePenalty in Gemini.
pub presence_penalty: Option<f64>,
}
/// Thread-safe store for intercepted data.
///
/// Keyed by a unique request ID that we can correlate with cascade operations.
@@ -102,6 +117,10 @@ pub struct MitmStore {
captured_thinking_text: Arc<RwLock<Option<String>>>,
/// Whether the captured response is complete (finishReason received).
response_complete: Arc<AtomicBool>,
// ── Generation parameters for MITM injection ─────────────────────────
/// Client-specified sampling parameters to inject into Google API requests.
generation_params: Arc<RwLock<Option<GenerationParams>>>,
}
/// Aggregate statistics across all intercepted traffic.
@@ -144,6 +163,7 @@ impl MitmStore {
captured_response_text: Arc::new(RwLock::new(None)),
captured_thinking_text: Arc::new(RwLock::new(None)),
response_complete: Arc::new(AtomicBool::new(false)),
generation_params: Arc::new(RwLock::new(None)),
}
}
@@ -433,4 +453,21 @@ impl MitmStore {
pub async fn clear_active_cascade(&self) {
*self.active_cascade_id.write().await = None;
}
// ── Generation parameters ────────────────────────────────────────────
/// Store client-specified generation parameters for MITM injection.
pub async fn set_generation_params(&self, params: GenerationParams) {
*self.generation_params.write().await = Some(params);
}
/// Read current generation parameters (non-consuming).
pub async fn get_generation_params(&self) -> Option<GenerationParams> {
self.generation_params.read().await.clone()
}
/// Clear generation parameters.
pub async fn clear_generation_params(&self) {
*self.generation_params.write().await = None;
}
}