feat: inject images via MITM layer instead of relying on LS
The LS silently ignores the 'images' field from our SendUserCascadeMessageRequest proto — it never forwards image data to Google's API. New approach: store the image in MitmStore, then the MITM request modifier injects it as 'inlineData' directly into the last user message's parts array in the Google API JSON request. Flow: Client → Proxy (decode base64) → MitmStore.set_pending_image() LS → Google API → MITM intercepts → inject inlineData part → Google receives image + text together This works for all three API endpoints (responses, completions, gemini).
This commit is contained in:
@@ -8,7 +8,7 @@ use regex::Regex;
|
||||
use serde_json::Value;
|
||||
use tracing::info;
|
||||
|
||||
use super::store::{CapturedFunctionCall, PendingToolResult};
|
||||
use super::store::{CapturedFunctionCall, PendingImage, PendingToolResult};
|
||||
|
||||
/// Strip ALL tool definitions.
|
||||
/// Must be true: with tools present, the LS enters full agentic mode
|
||||
@@ -28,6 +28,8 @@ pub struct ToolContext {
|
||||
pub last_calls: Vec<CapturedFunctionCall>,
|
||||
/// Client-specified generation parameters (temperature, top_p, etc.).
|
||||
pub generation_params: Option<super::store::GenerationParams>,
|
||||
/// Pending image to inject as inlineData in the user message.
|
||||
pub pending_image: Option<PendingImage>,
|
||||
}
|
||||
|
||||
/// Modify a streamGenerateContent request body in-place.
|
||||
@@ -451,6 +453,44 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
||||
}
|
||||
}
|
||||
|
||||
// ── 7. Inject pending image as inlineData ────────────────────────────
|
||||
// The LS doesn't forward images from our SendUserCascadeMessage proto to
|
||||
// Google's API, so we inject them here at the MITM layer.
|
||||
if let Some(ref ctx) = tool_ctx {
|
||||
if let Some(ref img) = ctx.pending_image {
|
||||
if let Some(contents) = json
|
||||
.pointer_mut("/request/contents")
|
||||
.and_then(|v| v.as_array_mut())
|
||||
{
|
||||
// Find the last user-role message and add inlineData to its parts
|
||||
let mut injected = false;
|
||||
for msg in contents.iter_mut().rev() {
|
||||
let is_user = msg["role"].as_str() == Some("user");
|
||||
if is_user {
|
||||
if let Some(parts) = msg.get_mut("parts").and_then(|v| v.as_array_mut()) {
|
||||
parts.push(serde_json::json!({
|
||||
"inlineData": {
|
||||
"mimeType": img.mime_type,
|
||||
"data": img.base64_data
|
||||
}
|
||||
}));
|
||||
injected = true;
|
||||
changes.push(format!(
|
||||
"inject image ({}; {} bytes base64)",
|
||||
img.mime_type,
|
||||
img.base64_data.len()
|
||||
));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !injected {
|
||||
tracing::warn!("MITM: pending image but no user message found to inject into");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if changes.is_empty() {
|
||||
return None; // Nothing modified
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user