feat: forward generation params via MITM + add usageMetadata to Gemini
- Add GenerationParams struct to MitmStore for temperature, top_p, top_k, max_output_tokens, stop_sequences, frequency/presence_penalty - MITM modify_request injects params into request.generationConfig - All 3 endpoints (Completions, Responses, Gemini) store client params - Add usageMetadata to Gemini sync responses (promptTokenCount, candidatesTokenCount, totalTokenCount, thoughtsTokenCount) - Add generation param fields to GeminiRequest (temperature, topP, etc.) - Completions stream_options.include_usage emits final usage chunk - Completions reasoning_tokens in completion_tokens_details - Update endpoint gap analysis doc (all high-priority gaps resolved)
This commit is contained in:
@@ -60,6 +60,21 @@ pub struct PendingToolResult {
|
||||
pub result: serde_json::Value,
|
||||
}
|
||||
|
||||
/// Client-specified generation parameters for MITM injection.
|
||||
/// Set by API handlers, consumed by the MITM modify layer.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct GenerationParams {
|
||||
pub temperature: Option<f64>,
|
||||
pub top_p: Option<f64>,
|
||||
pub top_k: Option<u32>,
|
||||
pub max_output_tokens: Option<u64>,
|
||||
pub stop_sequences: Option<Vec<String>>,
|
||||
/// Frequency penalty (OpenAI) — mapped to frequencyPenalty in Gemini.
|
||||
pub frequency_penalty: Option<f64>,
|
||||
/// Presence penalty (OpenAI) — mapped to presencePenalty in Gemini.
|
||||
pub presence_penalty: Option<f64>,
|
||||
}
|
||||
|
||||
/// Thread-safe store for intercepted data.
|
||||
///
|
||||
/// Keyed by a unique request ID that we can correlate with cascade operations.
|
||||
@@ -102,6 +117,10 @@ pub struct MitmStore {
|
||||
captured_thinking_text: Arc<RwLock<Option<String>>>,
|
||||
/// Whether the captured response is complete (finishReason received).
|
||||
response_complete: Arc<AtomicBool>,
|
||||
|
||||
// ── Generation parameters for MITM injection ─────────────────────────
|
||||
/// Client-specified sampling parameters to inject into Google API requests.
|
||||
generation_params: Arc<RwLock<Option<GenerationParams>>>,
|
||||
}
|
||||
|
||||
/// Aggregate statistics across all intercepted traffic.
|
||||
@@ -144,6 +163,7 @@ impl MitmStore {
|
||||
captured_response_text: Arc::new(RwLock::new(None)),
|
||||
captured_thinking_text: Arc::new(RwLock::new(None)),
|
||||
response_complete: Arc::new(AtomicBool::new(false)),
|
||||
generation_params: Arc::new(RwLock::new(None)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -433,4 +453,21 @@ impl MitmStore {
|
||||
pub async fn clear_active_cascade(&self) {
|
||||
*self.active_cascade_id.write().await = None;
|
||||
}
|
||||
|
||||
// ── Generation parameters ────────────────────────────────────────────
|
||||
|
||||
/// Store client-specified generation parameters for MITM injection.
|
||||
pub async fn set_generation_params(&self, params: GenerationParams) {
|
||||
*self.generation_params.write().await = Some(params);
|
||||
}
|
||||
|
||||
/// Read current generation parameters (non-consuming).
|
||||
pub async fn get_generation_params(&self) -> Option<GenerationParams> {
|
||||
self.generation_params.read().await.clone()
|
||||
}
|
||||
|
||||
/// Clear generation parameters.
|
||||
pub async fn clear_generation_params(&self) {
|
||||
*self.generation_params.write().await = None;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user