feat: inject images via MITM layer instead of relying on LS

The LS silently ignores the 'images' field from our
SendUserCascadeMessageRequest proto — it never forwards image data
to Google's API.

New approach: store the image in MitmStore, then the MITM request
modifier injects it as 'inlineData' directly into the last user
message's parts array in the Google API JSON request.

Flow:
  Client → Proxy (decode base64) → MitmStore.set_pending_image()
  LS → Google API → MITM intercepts → inject inlineData part
  → Google receives image + text together

This works for all three API endpoints (responses, completions,
gemini).
This commit is contained in:
Nikketryhard
2026-02-15 17:57:32 -06:00
parent 0a33c1b706
commit 89bea030cc
7 changed files with 108 additions and 2 deletions

View File

@@ -8,7 +8,7 @@ use regex::Regex;
use serde_json::Value;
use tracing::info;
use super::store::{CapturedFunctionCall, PendingToolResult};
use super::store::{CapturedFunctionCall, PendingImage, PendingToolResult};
/// Strip ALL tool definitions.
/// Must be true: with tools present, the LS enters full agentic mode
@@ -28,6 +28,8 @@ pub struct ToolContext {
pub last_calls: Vec<CapturedFunctionCall>,
/// Client-specified generation parameters (temperature, top_p, etc.).
pub generation_params: Option<super::store::GenerationParams>,
/// Pending image to inject as inlineData in the user message.
pub pending_image: Option<PendingImage>,
}
/// Modify a streamGenerateContent request body in-place.
@@ -451,6 +453,44 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
}
}
// ── 7. Inject pending image as inlineData ────────────────────────────
// The LS doesn't forward images from our SendUserCascadeMessage proto to
// Google's API, so we inject them here at the MITM layer.
if let Some(ref ctx) = tool_ctx {
if let Some(ref img) = ctx.pending_image {
if let Some(contents) = json
.pointer_mut("/request/contents")
.and_then(|v| v.as_array_mut())
{
// Find the last user-role message and add inlineData to its parts
let mut injected = false;
for msg in contents.iter_mut().rev() {
let is_user = msg["role"].as_str() == Some("user");
if is_user {
if let Some(parts) = msg.get_mut("parts").and_then(|v| v.as_array_mut()) {
parts.push(serde_json::json!({
"inlineData": {
"mimeType": img.mime_type,
"data": img.base64_data
}
}));
injected = true;
changes.push(format!(
"inject image ({}; {} bytes base64)",
img.mime_type,
img.base64_data.len()
));
break;
}
}
}
if !injected {
tracing::warn!("MITM: pending image but no user message found to inject into");
}
}
}
}
if changes.is_empty() {
return None; // Nothing modified
}