feat: inject images via MITM layer instead of relying on LS
The LS silently ignores the 'images' field from our SendUserCascadeMessageRequest proto — it never forwards image data to Google's API. New approach: store the image in MitmStore, then the MITM request modifier injects it as 'inlineData' directly into the last user message's parts array in the Google API JSON request. Flow: Client → Proxy (decode base64) → MitmStore.set_pending_image() LS → Google API → MITM intercepts → inject inlineData part → Google receives image + text together This works for all three API endpoints (responses, completions, gemini).
This commit is contained in:
@@ -303,6 +303,16 @@ pub(crate) async fn handle_completions(
|
|||||||
|
|
||||||
// Send message on primary cascade
|
// Send message on primary cascade
|
||||||
state.mitm_store.set_active_cascade(&cascade_id).await;
|
state.mitm_store.set_active_cascade(&cascade_id).await;
|
||||||
|
// Store image for MITM injection (LS doesn't forward images to Google API)
|
||||||
|
if let Some(ref img) = image {
|
||||||
|
use base64::Engine;
|
||||||
|
state.mitm_store.set_pending_image(
|
||||||
|
crate::mitm::store::PendingImage {
|
||||||
|
base64_data: base64::engine::general_purpose::STANDARD.encode(&img.data),
|
||||||
|
mime_type: img.mime_type.clone(),
|
||||||
|
}
|
||||||
|
).await;
|
||||||
|
}
|
||||||
match state
|
match state
|
||||||
.backend
|
.backend
|
||||||
.send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref())
|
.send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref())
|
||||||
|
|||||||
@@ -274,6 +274,16 @@ pub(crate) async fn handle_gemini(
|
|||||||
|
|
||||||
// Send message
|
// Send message
|
||||||
state.mitm_store.set_active_cascade(&cascade_id).await;
|
state.mitm_store.set_active_cascade(&cascade_id).await;
|
||||||
|
// Store image for MITM injection (LS doesn't forward images to Google API)
|
||||||
|
if let Some(ref img) = image {
|
||||||
|
use base64::Engine;
|
||||||
|
state.mitm_store.set_pending_image(
|
||||||
|
crate::mitm::store::PendingImage {
|
||||||
|
base64_data: base64::engine::general_purpose::STANDARD.encode(&img.data),
|
||||||
|
mime_type: img.mime_type.clone(),
|
||||||
|
}
|
||||||
|
).await;
|
||||||
|
}
|
||||||
match state
|
match state
|
||||||
.backend
|
.backend
|
||||||
.send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref())
|
.send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref())
|
||||||
|
|||||||
@@ -352,6 +352,16 @@ pub(crate) async fn handle_responses(
|
|||||||
|
|
||||||
// Send message
|
// Send message
|
||||||
state.mitm_store.set_active_cascade(&cascade_id).await;
|
state.mitm_store.set_active_cascade(&cascade_id).await;
|
||||||
|
// Store image for MITM injection (LS doesn't forward images to Google API)
|
||||||
|
if let Some(ref img) = image {
|
||||||
|
use base64::Engine;
|
||||||
|
state.mitm_store.set_pending_image(
|
||||||
|
crate::mitm::store::PendingImage {
|
||||||
|
base64_data: base64::engine::general_purpose::STANDARD.encode(&img.data),
|
||||||
|
mime_type: img.mime_type.clone(),
|
||||||
|
}
|
||||||
|
).await;
|
||||||
|
}
|
||||||
match state
|
match state
|
||||||
.backend
|
.backend
|
||||||
.send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref())
|
.send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref())
|
||||||
|
|||||||
@@ -366,6 +366,12 @@ impl Backend {
|
|||||||
return Err("No OAuth token available".to_string());
|
return Err("No OAuth token available".to_string());
|
||||||
}
|
}
|
||||||
let proto = crate::proto::build_request_with_image(cascade_id, text, &token, model_enum, image);
|
let proto = crate::proto::build_request_with_image(cascade_id, text, &token, model_enum, image);
|
||||||
|
if image.is_some() {
|
||||||
|
tracing::info!(
|
||||||
|
proto_size = proto.len(),
|
||||||
|
"SendUserCascadeMessage proto built with image"
|
||||||
|
);
|
||||||
|
}
|
||||||
self.call_proto("SendUserCascadeMessage", proto).await
|
self.call_proto("SendUserCascadeMessage", proto).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ use regex::Regex;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tracing::info;
|
use tracing::info;
|
||||||
|
|
||||||
use super::store::{CapturedFunctionCall, PendingToolResult};
|
use super::store::{CapturedFunctionCall, PendingImage, PendingToolResult};
|
||||||
|
|
||||||
/// Strip ALL tool definitions.
|
/// Strip ALL tool definitions.
|
||||||
/// Must be true: with tools present, the LS enters full agentic mode
|
/// Must be true: with tools present, the LS enters full agentic mode
|
||||||
@@ -28,6 +28,8 @@ pub struct ToolContext {
|
|||||||
pub last_calls: Vec<CapturedFunctionCall>,
|
pub last_calls: Vec<CapturedFunctionCall>,
|
||||||
/// Client-specified generation parameters (temperature, top_p, etc.).
|
/// Client-specified generation parameters (temperature, top_p, etc.).
|
||||||
pub generation_params: Option<super::store::GenerationParams>,
|
pub generation_params: Option<super::store::GenerationParams>,
|
||||||
|
/// Pending image to inject as inlineData in the user message.
|
||||||
|
pub pending_image: Option<PendingImage>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Modify a streamGenerateContent request body in-place.
|
/// Modify a streamGenerateContent request body in-place.
|
||||||
@@ -451,6 +453,44 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option<Vec
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── 7. Inject pending image as inlineData ────────────────────────────
|
||||||
|
// The LS doesn't forward images from our SendUserCascadeMessage proto to
|
||||||
|
// Google's API, so we inject them here at the MITM layer.
|
||||||
|
if let Some(ref ctx) = tool_ctx {
|
||||||
|
if let Some(ref img) = ctx.pending_image {
|
||||||
|
if let Some(contents) = json
|
||||||
|
.pointer_mut("/request/contents")
|
||||||
|
.and_then(|v| v.as_array_mut())
|
||||||
|
{
|
||||||
|
// Find the last user-role message and add inlineData to its parts
|
||||||
|
let mut injected = false;
|
||||||
|
for msg in contents.iter_mut().rev() {
|
||||||
|
let is_user = msg["role"].as_str() == Some("user");
|
||||||
|
if is_user {
|
||||||
|
if let Some(parts) = msg.get_mut("parts").and_then(|v| v.as_array_mut()) {
|
||||||
|
parts.push(serde_json::json!({
|
||||||
|
"inlineData": {
|
||||||
|
"mimeType": img.mime_type,
|
||||||
|
"data": img.base64_data
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
injected = true;
|
||||||
|
changes.push(format!(
|
||||||
|
"inject image ({}; {} bytes base64)",
|
||||||
|
img.mime_type,
|
||||||
|
img.base64_data.len()
|
||||||
|
));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !injected {
|
||||||
|
tracing::warn!("MITM: pending image but no user message found to inject into");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if changes.is_empty() {
|
if changes.is_empty() {
|
||||||
return None; // Nothing modified
|
return None; // Nothing modified
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -562,14 +562,16 @@ async fn handle_http_over_tls(
|
|||||||
let pending_results = store.take_tool_results().await;
|
let pending_results = store.take_tool_results().await;
|
||||||
let last_calls = store.get_last_function_calls().await;
|
let last_calls = store.get_last_function_calls().await;
|
||||||
let generation_params = store.get_generation_params().await;
|
let generation_params = store.get_generation_params().await;
|
||||||
|
let pending_image = store.take_pending_image().await;
|
||||||
|
|
||||||
let tool_ctx = if tools.is_some() || !pending_results.is_empty() || generation_params.is_some() {
|
let tool_ctx = if tools.is_some() || !pending_results.is_empty() || generation_params.is_some() || pending_image.is_some() {
|
||||||
Some(super::modify::ToolContext {
|
Some(super::modify::ToolContext {
|
||||||
tools,
|
tools,
|
||||||
tool_config,
|
tool_config,
|
||||||
pending_results,
|
pending_results,
|
||||||
last_calls,
|
last_calls,
|
||||||
generation_params,
|
generation_params,
|
||||||
|
pending_image,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
|||||||
@@ -60,6 +60,17 @@ pub struct PendingToolResult {
|
|||||||
pub result: serde_json::Value,
|
pub result: serde_json::Value,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A pending image to inject via MITM into the Google API request.
|
||||||
|
/// The LS doesn't forward images from our SendUserCascadeMessage proto,
|
||||||
|
/// so we inject them directly at the MITM layer.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct PendingImage {
|
||||||
|
/// Base64-encoded image data (no prefix).
|
||||||
|
pub base64_data: String,
|
||||||
|
/// MIME type, e.g. "image/png".
|
||||||
|
pub mime_type: String,
|
||||||
|
}
|
||||||
|
|
||||||
/// Client-specified generation parameters for MITM injection.
|
/// Client-specified generation parameters for MITM injection.
|
||||||
/// Set by API handlers, consumed by the MITM modify layer.
|
/// Set by API handlers, consumed by the MITM modify layer.
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
@@ -137,6 +148,10 @@ pub struct MitmStore {
|
|||||||
// ── Grounding metadata capture ──────────────────────────────────────
|
// ── Grounding metadata capture ──────────────────────────────────────
|
||||||
/// Captured grounding metadata from Google API responses (search results).
|
/// Captured grounding metadata from Google API responses (search results).
|
||||||
captured_grounding: Arc<RwLock<Option<serde_json::Value>>>,
|
captured_grounding: Arc<RwLock<Option<serde_json::Value>>>,
|
||||||
|
|
||||||
|
// ── Pending image for MITM injection ─────────────────────────────────
|
||||||
|
/// Image to inject into the next Google API request via MITM.
|
||||||
|
pending_image: Arc<RwLock<Option<PendingImage>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Aggregate statistics across all intercepted traffic.
|
/// Aggregate statistics across all intercepted traffic.
|
||||||
@@ -181,6 +196,7 @@ impl MitmStore {
|
|||||||
response_complete: Arc::new(AtomicBool::new(false)),
|
response_complete: Arc::new(AtomicBool::new(false)),
|
||||||
generation_params: Arc::new(RwLock::new(None)),
|
generation_params: Arc::new(RwLock::new(None)),
|
||||||
captured_grounding: Arc::new(RwLock::new(None)),
|
captured_grounding: Arc::new(RwLock::new(None)),
|
||||||
|
pending_image: Arc::new(RwLock::new(None)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -506,4 +522,16 @@ impl MitmStore {
|
|||||||
pub async fn peek_grounding(&self) -> Option<serde_json::Value> {
|
pub async fn peek_grounding(&self) -> Option<serde_json::Value> {
|
||||||
self.captured_grounding.read().await.clone()
|
self.captured_grounding.read().await.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Pending image for MITM injection ─────────────────────────────────
|
||||||
|
|
||||||
|
/// Store a pending image for MITM injection.
|
||||||
|
pub async fn set_pending_image(&self, image: PendingImage) {
|
||||||
|
*self.pending_image.write().await = Some(image);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take (consume) pending image for injection.
|
||||||
|
pub async fn take_pending_image(&self) -> Option<PendingImage> {
|
||||||
|
self.pending_image.write().await.take()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user