From 89bea030cc35edac0a8b479f566853c7f1e409d1 Mon Sep 17 00:00:00 2001 From: Nikketryhard Date: Sun, 15 Feb 2026 17:57:32 -0600 Subject: [PATCH] feat: inject images via MITM layer instead of relying on LS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LS silently ignores the 'images' field from our SendUserCascadeMessageRequest proto — it never forwards image data to Google's API. New approach: store the image in MitmStore, then the MITM request modifier injects it as 'inlineData' directly into the last user message's parts array in the Google API JSON request. Flow: Client → Proxy (decode base64) → MitmStore.set_pending_image() LS → Google API → MITM intercepts → inject inlineData part → Google receives image + text together This works for all three API endpoints (responses, completions, gemini). --- src/api/completions.rs | 10 ++++++++++ src/api/gemini.rs | 10 ++++++++++ src/api/responses.rs | 10 ++++++++++ src/backend.rs | 6 ++++++ src/mitm/modify.rs | 42 +++++++++++++++++++++++++++++++++++++++++- src/mitm/proxy.rs | 4 +++- src/mitm/store.rs | 28 ++++++++++++++++++++++++++++ 7 files changed, 108 insertions(+), 2 deletions(-) diff --git a/src/api/completions.rs b/src/api/completions.rs index 14d369e..7b0de4a 100644 --- a/src/api/completions.rs +++ b/src/api/completions.rs @@ -303,6 +303,16 @@ pub(crate) async fn handle_completions( // Send message on primary cascade state.mitm_store.set_active_cascade(&cascade_id).await; + // Store image for MITM injection (LS doesn't forward images to Google API) + if let Some(ref img) = image { + use base64::Engine; + state.mitm_store.set_pending_image( + crate::mitm::store::PendingImage { + base64_data: base64::engine::general_purpose::STANDARD.encode(&img.data), + mime_type: img.mime_type.clone(), + } + ).await; + } match state .backend .send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref()) diff --git a/src/api/gemini.rs b/src/api/gemini.rs index a418b78..d3fe8e8 100644 --- a/src/api/gemini.rs +++ b/src/api/gemini.rs @@ -274,6 +274,16 @@ pub(crate) async fn handle_gemini( // Send message state.mitm_store.set_active_cascade(&cascade_id).await; + // Store image for MITM injection (LS doesn't forward images to Google API) + if let Some(ref img) = image { + use base64::Engine; + state.mitm_store.set_pending_image( + crate::mitm::store::PendingImage { + base64_data: base64::engine::general_purpose::STANDARD.encode(&img.data), + mime_type: img.mime_type.clone(), + } + ).await; + } match state .backend .send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref()) diff --git a/src/api/responses.rs b/src/api/responses.rs index 7fe8f90..462b9b5 100644 --- a/src/api/responses.rs +++ b/src/api/responses.rs @@ -352,6 +352,16 @@ pub(crate) async fn handle_responses( // Send message state.mitm_store.set_active_cascade(&cascade_id).await; + // Store image for MITM injection (LS doesn't forward images to Google API) + if let Some(ref img) = image { + use base64::Engine; + state.mitm_store.set_pending_image( + crate::mitm::store::PendingImage { + base64_data: base64::engine::general_purpose::STANDARD.encode(&img.data), + mime_type: img.mime_type.clone(), + } + ).await; + } match state .backend .send_message_with_image(&cascade_id, &user_text, model.model_enum, image.as_ref()) diff --git a/src/backend.rs b/src/backend.rs index dfcadc3..8c232c3 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -366,6 +366,12 @@ impl Backend { return Err("No OAuth token available".to_string()); } let proto = crate::proto::build_request_with_image(cascade_id, text, &token, model_enum, image); + if image.is_some() { + tracing::info!( + proto_size = proto.len(), + "SendUserCascadeMessage proto built with image" + ); + } self.call_proto("SendUserCascadeMessage", proto).await } diff --git a/src/mitm/modify.rs b/src/mitm/modify.rs index 2cf3191..67f525d 100644 --- a/src/mitm/modify.rs +++ b/src/mitm/modify.rs @@ -8,7 +8,7 @@ use regex::Regex; use serde_json::Value; use tracing::info; -use super::store::{CapturedFunctionCall, PendingToolResult}; +use super::store::{CapturedFunctionCall, PendingImage, PendingToolResult}; /// Strip ALL tool definitions. /// Must be true: with tools present, the LS enters full agentic mode @@ -28,6 +28,8 @@ pub struct ToolContext { pub last_calls: Vec, /// Client-specified generation parameters (temperature, top_p, etc.). pub generation_params: Option, + /// Pending image to inject as inlineData in the user message. + pub pending_image: Option, } /// Modify a streamGenerateContent request body in-place. @@ -451,6 +453,44 @@ pub fn modify_request(body: &[u8], tool_ctx: Option<&ToolContext>) -> Option>>, + + // ── Pending image for MITM injection ───────────────────────────────── + /// Image to inject into the next Google API request via MITM. + pending_image: Arc>>, } /// Aggregate statistics across all intercepted traffic. @@ -181,6 +196,7 @@ impl MitmStore { response_complete: Arc::new(AtomicBool::new(false)), generation_params: Arc::new(RwLock::new(None)), captured_grounding: Arc::new(RwLock::new(None)), + pending_image: Arc::new(RwLock::new(None)), } } @@ -506,4 +522,16 @@ impl MitmStore { pub async fn peek_grounding(&self) -> Option { self.captured_grounding.read().await.clone() } + + // ── Pending image for MITM injection ───────────────────────────────── + + /// Store a pending image for MITM injection. + pub async fn set_pending_image(&self, image: PendingImage) { + *self.pending_image.write().await = Some(image); + } + + /// Take (consume) pending image for injection. + pub async fn take_pending_image(&self) -> Option { + self.pending_image.write().await.take() + } }