chore: code cleanup and documentation overhaul

- Remove debug header dump from MITM proxy (was temp debugging code) - Suppress dead_code warnings for intentional OpenAI compat fields - Rewrite README with styled mermaid architecture diagrams, full feature listing, usage examples, and CLI reference - Update endpoint-gap-analysis: images implemented, audio only stretch - Update mitm-interception-status: add request modification and error capture components - Update standalone-ls-todo: add new endpoints to test results - Zero compiler warnings
2026-02-15 18:27:53 -06:00
parent 2882f7cce2
commit 4e4d8e9474
7 changed files with 354 additions and 152 deletions
--- a/README.md
+++ b/README.md
@@ -1,18 +1,78 @@
 # Antigravity Proxy
-OpenAI-compatible proxy that intercepts and relays requests to Google's Antigravity language server, impersonating the real Electron webview.
+OpenAI-compatible proxy that intercepts and relays requests to Google's Antigravity language server, impersonating the real Electron webview. Supports the Responses API, Chat Completions API, and a native Gemini endpoint with full streaming, multi-turn conversations, tool calling, image uploads, web search grounding, and real token usage capture via MITM interception.
 ## Architecture
 ```mermaid
 %%{init: {'theme': 'dark', 'themeVariables': {'primaryColor': '#1a1a2e', 'primaryTextColor': '#e0e0e0', 'primaryBorderColor': '#7c3aed', 'lineColor': '#7c3aed', 'secondaryColor': '#16213e', 'tertiaryColor': '#0f3460', 'edgeLabelBackground': '#1a1a2e', 'nodeTextColor': '#e0e0e0'}}}%%
 graph TB
    subgraph client["Client Layer"]
        style client fill:#1a1a2e,stroke:#7c3aed,stroke-width:2px,color:#e0e0e0
        APP["OpenAI SDK / curl / Any HTTP Client"]
    end
    subgraph proxy["Proxy Layer :8741"]
        style proxy fill:#16213e,stroke:#7c3aed,stroke-width:2px,color:#e0e0e0
        API["API Router<br/>responses | completions | gemini | search"]
        STORE["MitmStore<br/>tools | images | errors | usage"]
        PROTO["Protobuf Encoder<br/>byte-exact webview match"]
    end
    subgraph ls["Language Server"]
        style ls fill:#0f3460,stroke:#7c3aed,stroke-width:2px,color:#e0e0e0
        STANDALONE["Standalone LS<br/>isolated process, UID: antigravity-ls"]
    end
    subgraph mitm["MITM Layer :8742"]
        style mitm fill:#1a1a2e,stroke:#e94560,stroke-width:2px,color:#e0e0e0
        INTERCEPT["TLS Intercept<br/>decrypt + modify + re-encrypt"]
        MODIFY["Request Modifier<br/>inject tools, images, params"]
        PARSE["Response Parser<br/>usage, errors, function calls"]
    end
    subgraph google["Google API"]
        style google fill:#0f3460,stroke:#7c3aed,stroke-width:2px,color:#e0e0e0
        GAPI["daily-cloudcode-pa.googleapis.com<br/>v1internal:streamGenerateContent"]
    end
    APP -->|"HTTP POST"| API
    API --> STORE
    API --> PROTO
    PROTO -->|"gRPC"| STANDALONE
    STANDALONE -->|"HTTPS :443"| INTERCEPT
    INTERCEPT --> MODIFY
    MODIFY -->|"inject tools, images,<br/>generation params"| GAPI
    GAPI -->|"SSE response"| PARSE
    PARSE -->|"usage, errors,<br/>function calls"| STORE
    INTERCEPT -.->|"iptables REDIRECT<br/>UID-scoped"| STANDALONE
    classDef highlight fill:#7c3aed,stroke:#e94560,stroke-width:2px,color:#fff
 ```
 ### Request Flow
 1. Client sends an OpenAI-compatible request to the proxy
 2. Proxy encodes the message as a protobuf matching the real webview format
 3. Proxy sends it to the standalone Language Server via gRPC
 4. LS makes an HTTPS request to Google's API
 5. iptables redirects the LS's traffic (UID-scoped) to the MITM proxy
 6. MITM decrypts TLS, modifies the request (injects tools, images, params), re-encrypts and forwards to Google
 7. Google's SSE response flows back through MITM, which captures usage, errors, and function calls
 8. Proxy polls the LS for cascade state, supplementing with MITM-captured data
 9. Client receives the response in OpenAI-compatible format
 ## Quick Start
 ```bash
-# Build
+# First-time setup (creates user + iptables for MITM)
-cargo build --release
+sudo ./scripts/mitm-redirect.sh install
-# Run (language server must be running)
+# Start as daemon (builds if needed)
 proxyctl start
 # Or run directly
 RUST_LOG=info ./target/release/antigravity-proxy
 # Custom port
 RUST_LOG=info ./target/release/antigravity-proxy --port 9000
 ```
 Default port: **8741**
@@ -20,30 +80,99 @@ Default port: **8741**
 ## Endpoints
 | Method     | Path                   | Description                                                  |
-| -------- | ---------------------- | ----------------------------------------------------------- |
+| ---------- | ---------------------- | ------------------------------------------------------------ |
-| `POST`   | `/v1/responses`        | **Responses API** (primary) — supports `stream: true/false` |
+| `POST`     | `/v1/responses`        | **Responses API** (primary) -- supports `stream: true/false` |
-| `POST`   | `/v1/chat/completions` | Chat Completions API (OpenAI compat shim)                   |
+| `POST`     | `/v1/chat/completions` | Chat Completions API (OpenAI compat)                         |
 | `POST`     | `/v1/gemini`           | Native Gemini API                                            |
 | `GET/POST` | `/v1/search`           | Web Search via Google Search grounding                       |
 | `GET`      | `/v1/models`           | List available models                                        |
 | `GET`      | `/v1/sessions`         | List active sessions                                         |
 | `DELETE`   | `/v1/sessions/:id`     | Delete a session                                             |
 | `POST`     | `/v1/token`            | Set OAuth token at runtime                                   |
 | `GET`      | `/v1/usage`            | MITM-intercepted token usage stats                           |
-| `GET`    | `/v1/quota`            | LS quota — credits, per-model rate limits, reset timers     |
+| `GET`      | `/v1/quota`            | LS quota -- credits, per-model rate limits, reset timers     |
 | `GET`      | `/health`              | Health check                                                 |
 ## Available Models
 | Name                | Label                                     |
-| ------------------- | ---------------------------------------- |
+| ------------------- | ----------------------------------------- |
-| `opus-4.6`          | Claude Opus 4.6 (Thinking) — **default** |
+| `opus-4.6`          | Claude Opus 4.6 (Thinking) -- **default** |
 | `opus-4.5`          | Claude Opus 4.5 (Thinking)                |
 | `gemini-3-pro-high` | Gemini 3 Pro (High)                       |
 | `gemini-3-pro`      | Gemini 3 Pro (Low)                        |
 | `gemini-3-flash`    | Gemini 3 Flash                            |
-## Example: Responses API
+## Features
-### Sync
+### Core
 - **Sync and streaming** on all endpoints
 - **Multi-turn conversations** via `conversation` session ID (cascade reuse)
 - **Full message history** forwarded for Chat Completions
 - **Thinking/reasoning** exposed in both sync and streaming modes
 - **Thinking signatures** preserved for multi-turn thinking model chains
 ### Tool Calling
 - **OpenAI-format tools** auto-converted to Gemini format via MITM injection
 - **`tool_choice`** support (`auto`, `none`, `required`, named function)
 - **`max_tool_calls`** limit on tool calls per response
 - **Function call results** (`function_call_output`) routed back correctly
 - **Native Gemini tools** passed through on the `/v1/gemini` endpoint
 ### Image Uploads
 Images are injected directly into Google's API request via MITM (the LS does not forward images natively).
 Supported input formats:
 - Responses API: `{type: "input_image", image_url: "data:image/png;base64,..."}`
 - Chat Completions: `{type: "image_url", image_url: {url: "data:image/png;base64,..."}}`
 - Gemini API: `{type: "input_image", image_url: "data:image/png;base64,..."}`
 ### Web Search
 Google Search grounding can be enabled on any endpoint:
 - Completions: `"web_search": true`
 - Responses: `"tools": [{"type": "web_search_preview"}]`
 - Gemini: `"google_search": true`
 - Dedicated: `GET/POST /v1/search` returns structured results with citations
 ### Generation Parameters
 All parameters are forwarded to Google via MITM injection:
 | Parameter                | Endpoints                                             |
 | ------------------------ | ----------------------------------------------------- |
 | `temperature`            | All                                                   |
 | `top_p` / `topP`         | All                                                   |
 | `top_k` / `topK`         | Gemini                                                |
 | `max_output_tokens`      | All                                                   |
 | `stop` / `stopSequences` | All                                                   |
 | `frequency_penalty`      | Completions                                           |
 | `presence_penalty`       | Completions                                           |
 | `reasoning_effort`       | All (mapped to `thinkingLevel`)                       |
 | `response_format`        | Completions, Responses (`json_object`, `json_schema`) |
 ### Error Propagation
 When Google's API returns an error (400, 429, 500, etc.), the MITM proxy captures it and the API handler returns it immediately to the client instead of hanging until timeout.
 Error status mapping:
 | Google Status        | HTTP Code | OpenAI Error Type       |
 | -------------------- | --------- | ----------------------- |
 | `INVALID_ARGUMENT`   | 400       | `invalid_request_error` |
 | `RESOURCE_EXHAUSTED` | 429       | `rate_limit_error`      |
 | `PERMISSION_DENIED`  | 403       | `authentication_error`  |
 | `INTERNAL`           | 500       | `server_error`          |
 | `UNAVAILABLE`        | 503       | `server_error`          |
 ## Usage Examples
 ### Responses API (sync)
 ```bash
 curl -s http://localhost:8741/v1/responses \
@@ -56,7 +185,7 @@ curl -s http://localhost:8741/v1/responses \
  }' | jq .
 ```
-### Streaming
+### Responses API (streaming)
 ```bash
 curl -N http://localhost:8741/v1/responses \
@@ -69,10 +198,9 @@ curl -N http://localhost:8741/v1/responses \
  }'
 ```
-### Multi-turn (session reuse)
+### Multi-turn Conversation
 ```bash
 # First message
 curl -s http://localhost:8741/v1/responses \
  -H "Content-Type: application/json" \
  -d '{
@@ -93,6 +221,61 @@ curl -s http://localhost:8741/v1/responses \
  }' | jq .
 ```
 ### Image Upload
 ```bash
 curl -s http://localhost:8741/v1/responses \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3-flash",
    "input": [
      {"type": "input_image", "image_url": "data:image/png;base64,iVBORw0KGgo..."},
      {"type": "input_text", "text": "What is in this image?"}
    ],
    "stream": false
  }' | jq .
 ```
 ### Web Search
 ```bash
 # Dedicated search endpoint
 curl -s 'http://localhost:8741/v1/search?q=latest+rust+news' | jq .
 # Inline grounding on any endpoint
 curl -s http://localhost:8741/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3-flash",
    "messages": [{"role": "user", "content": "What happened in tech today?"}],
    "web_search": true
  }' | jq .
 ```
 ### Tool Calling
 ```bash
 curl -s http://localhost:8741/v1/responses \
  -H "Content-Type: application/json" \
  -d '{
    "model": "gemini-3-flash",
    "input": "What is the weather in Tokyo?",
    "tools": [{
      "type": "function",
      "function": {
        "name": "get_weather",
        "description": "Get weather for a location",
        "parameters": {
          "type": "object",
          "properties": {"location": {"type": "string"}},
          "required": ["location"]
        }
      }
    }],
    "stream": false
  }' | jq .
 ```
 ## Authentication
 The proxy needs an OAuth token. Three ways to provide it:
@@ -103,111 +286,112 @@ The proxy needs an OAuth token. Three ways to provide it:
 ## Stealth Features
- **TLS fingerprint**: BoringSSL with Chrome JA3/JA4 + H2 fingerprint via `wreq` (version auto-detected)
+- **TLS fingerprint** -- BoringSSL with Chrome JA3/JA4 + H2 fingerprint via `wreq` (version auto-detected)
- **Protobuf**: Hand-rolled encoder producing byte-exact match to real webview traffic
+- **Protobuf** -- Hand-rolled encoder producing byte-exact match to real webview traffic
- **Warmup**: Mimics real webview startup RPC calls
+- **Warmup** -- Mimics real webview startup RPC calls
- **Heartbeat**: Periodic keep-alive matching real webview lifecycle
+- **Heartbeat** -- Periodic keep-alive matching real webview lifecycle
- **Jitter**: Randomized polling intervals to avoid automation fingerprint
+- **Reactive streaming** -- `StreamCascadeReactiveUpdates` for real-time state diffs (polling fallback)
- **Session reuse**: Cascades are reused for multi-turn, matching real webview behavior
+- **Jitter** -- Randomized intervals to avoid automation fingerprint
- **Version detection**: Auto-detects Antigravity/Chrome/Electron versions from installed app
+- **Session reuse** -- Cascades reused for multi-turn, matching real webview behavior
 - **Version detection** -- Auto-detects Antigravity/Chrome/Electron versions from installed app
-## MITM Proxy
+## CLI Reference
-Built-in TLS-intercepting proxy captures real token usage from LS ↔ Google/Anthropic traffic. Disabled with `--no-mitm`.
+### `proxyctl` -- Daemon Manager
-### Setup
+Symlinked to `~/.local/bin/proxyctl` for global access.
 | Command               | Description                             |
 | --------------------- | --------------------------------------- |
 | `proxyctl start`      | Start the proxy daemon                  |
 | `proxyctl stop`       | Stop the proxy daemon                   |
 | `proxyctl restart`    | Rebuild + restart                       |
 | `proxyctl rebuild`    | Build release binary only               |
 | `proxyctl status`     | Service status + quota + usage          |
 | `proxyctl logs [N]`   | Tail last N lines (default 30) + follow |
 | `proxyctl logs-all`   | Full log dump (no follow)               |
 | `proxyctl test [msg]` | Quick test request (gemini-3-flash)     |
 | `proxyctl health`     | Health check                            |
 ### `mitm-redirect.sh` -- MITM Setup
 One-time setup script for UID-scoped iptables traffic redirection.
 ```bash
-# 1. Start proxy (generates CA cert automatically)
+sudo ./scripts/mitm-redirect.sh install    # create user + iptables rule
-RUST_LOG=info ./target/release/antigravity-proxy
+sudo ./scripts/mitm-redirect.sh uninstall  # remove user + iptables rule
-
+sudo ./scripts/mitm-redirect.sh status     # check current state
 # 2. Install wrapper (patches LS binary to route through MITM)
 sudo ./scripts/mitm-wrapper.sh install
 # 3. Restart Antigravity — done!
 # Check status
 ./scripts/mitm-wrapper.sh status
 # Uninstall
 sudo ./scripts/mitm-wrapper.sh uninstall
 ```
-### Usage Stats
+### Proxy Binary
 ```bash
 curl -s http://localhost:8741/v1/usage | jq .
 ```
 ## Standalone Language Server
 Launch an isolated LS instance for experimentation:
 ```bash
 # Basic test (starts, checks quota, exits)
 ./scripts/standalone-ls.sh
 # Foreground mode (stays alive)
 ./scripts/standalone-ls.sh --fg
 # With MITM traffic interception
 ./scripts/standalone-ls.sh --mitm
 # Capture a clean traffic snapshot
 ./scripts/standalone-ls.sh --snapshot
 # Snapshot with custom prompt
 ./scripts/standalone-ls.sh --snapshot --prompt "Explain quantum computing"
 ```
 The standalone LS shares the main Antigravity app's OAuth (via its extension server) but has its own port, data directory, and cascades.
 ### Traffic Snapshots
 The `--snapshot` flag captures all HTTP/2 traffic and formats it into a clean, color-coded report:
 ```
 ══════════════════════════════════════════════════════════════════════
  STANDALONE LS TRAFFIC SNAPSHOT
 ══════════════════════════════════════════════════════════════════════
 ▸ Outbound Connections
  → antigravity-unleash.goog    (Feature Flags)
  → play.googleapis.com         (Telemetry)
 ══════════════════════════════════════════════════════════════════════
  antigravity-unleash.goog — Feature Flags
 ══════════════════════════════════════════════════════════════════════
  → POST /api/client/register
    authorization: *:production.e4455...
    unleash-appname: codeium-language-server
    Body (561 bytes, JSON):
      {"appName":"codeium-language-server","instanceId":"..."}
 ```
 ## Architecture
 ```mermaid
 graph LR
    A[Your App<br/>OpenAI SDK] -->|HTTP| B[Proxy<br/>:8741]
    B -->|gRPC| C[Language<br/>Server]
    C -->|HTTPS| D[Google /<br/>Anthropic]
    E[MITM Proxy<br/>:8742] -.->|intercept| D
    C -.->|routed via| E
 ```
 ## CLI Flags
 ```
 antigravity-proxy [OPTIONS]
 Options:
  --port <PORT>          API server port (default: 8741)
-  --no-mitm              Disable MITM proxy
+  --no-standalone        Attach to existing LS instead of spawning standalone
-  --mitm-port <PORT>     Override MITM proxy port (default: auto)
+  --no-mitm              Disable MITM proxy entirely
  --mitm-port <PORT>     Override MITM proxy port (default: auto-assign)
 ```
 ## MITM Proxy
 ### How It Works
 ```mermaid
 %%{init: {'theme': 'dark', 'themeVariables': {'primaryColor': '#1a1a2e', 'primaryTextColor': '#e0e0e0', 'primaryBorderColor': '#e94560', 'lineColor': '#e94560', 'secondaryColor': '#16213e', 'tertiaryColor': '#0f3460'}}}%%
 graph LR
    subgraph proxy_layer["Proxy :8741"]
        style proxy_layer fill:#16213e,stroke:#7c3aed,stroke-width:2px,color:#e0e0e0
        P["API Handler"]
        S["MitmStore"]
    end
    subgraph ls_layer["Standalone LS"]
        style ls_layer fill:#0f3460,stroke:#7c3aed,stroke-width:2px,color:#e0e0e0
        LS["language_server<br/>UID: antigravity-ls"]
    end
    subgraph mitm_layer["MITM :8742"]
        style mitm_layer fill:#1a1a2e,stroke:#e94560,stroke-width:2px,color:#e0e0e0
        M["TLS Decrypt"]
        MOD["Modify Request<br/>tools | images | params"]
        CAP["Capture Response<br/>usage | errors | calls"]
    end
    subgraph google_layer["Google API"]
        style google_layer fill:#0f3460,stroke:#7c3aed,stroke-width:2px,color:#e0e0e0
        G["streamGenerateContent"]
    end
    P -->|"image, tools,<br/>params"| S
    P -->|"protobuf"| LS
    LS -->|":443 traffic"| M
    M --> MOD
    MOD -->|"modified request"| G
    G -->|"SSE response"| CAP
    CAP -->|"usage, errors"| S
    S -->|"error or result"| P
    linkStyle 2 stroke:#e94560,stroke-width:2px
 ```
 - **UID-scoped iptables** -- only the standalone LS's traffic is intercepted (zero side effects)
 - **Combined CA bundle** -- system CAs + MITM CA written to `/tmp/antigravity-mitm-combined-ca.pem`
 - **Google SSE parsing** -- extracts `promptTokenCount`, `candidatesTokenCount`, `thoughtsTokenCount`
 - **Request modification** -- strips LS bloat, injects client tools/images/params (97%+ size reduction typical)
 - **Error capture** -- upstream errors stored in MitmStore for instant client forwarding
 - **Init metadata** -- protobuf field 34 `detect_and_use_proxy` set to ENABLED (1)
 ## Development
 - **Dev/testing model**: `gemini-3-flash` -- use for all development and iterative testing
 - **Production model**: `opus-4.6` -- use sparingly (quota limited)
 - See `docs/ls-binary-analysis.md` for reverse-engineered model catalog and proto enum mappings
 - See `docs/endpoint-gap-analysis.md` for full API coverage audit
 - See `docs/mitm-interception-status.md` for MITM technical details
 ## License
 Private. Do not distribute.
--- a/docs/endpoint-gap-analysis.md
+++ b/docs/endpoint-gap-analysis.md
@@ -22,6 +22,8 @@
 - ✅ `response_format: {type: "json_object"}` — injected as `responseMimeType: "application/json"`
 - ✅ Google Search grounding — `web_search: true` (Completions), `tools: [{type: "web_search_preview"}]` (Responses), `google_search: true` (Gemini)
 - ✅ `/v1/search` endpoint — dedicated web search via Google Search grounding, returns structured results + citations
 - ✅ Image uploads — `input_image` / `image_url` with base64 data URIs, injected via MITM as `inlineData`
 - ✅ Upstream error propagation — Google API errors (400, 429, 500) returned to client instantly instead of hanging
 ### Reasoning Effort → Thinking Level Mapping
@@ -115,8 +117,8 @@ All structured output features have been implemented.
 ### Stretch (research needed)
 | #   | Gap             | API  | Notes                                                            |
-| --- | -------------------------- | ---- | ---------------------------------------------------------------------------------------------------------------------------- |
+| --- | --------------- | ---- | ---------------------------------------------------------------- |
-| 12  | **Image/audio modalities** | Both | LS `sendMessage` is text-only. Need to reverse-engineer proto format for binary payloads. Gemini 3 supports vision natively. |
+| 12  | **Audio input** | Both | Audio modalities not yet supported. Vision/images work via MITM. |
 ---
--- a/docs/mitm-interception-status.md
+++ b/docs/mitm-interception-status.md
@@ -45,6 +45,21 @@ Client → Proxy (8741) → Standalone LS (as antigravity-ls user)
   - Forwards HTTP/1.1 requests upstream with real DNS resolution (`dig @8.8.8.8`)
   - Chunked response detection for fast completion
 6. **Request modification** (`src/mitm/modify.rs`)
   - Strips LS system instructions down to `<identity>` block only
   - Removes stale conversation history (keeps only last user message)
   - Injects client tools, tool configs, generation params
   - Injects images as `inlineData` (base64) into user message parts
   - Injects tool results as `functionResponse` parts
   - Enables Google Search grounding when requested
   - Updates `Content-Length` header after body modification
 7. **Upstream error capture** (`src/mitm/store.rs`)
   - Captures Google API error responses (HTTP 400, 429, 500, etc.)
   - Parses error JSON for message and status fields
   - Stores in `MitmStore` for immediate forwarding to client
   - Prevents request hangs on upstream failures
 ## What We Tried (Historical)
 ### 1. Extension Patch — `detectAndUseProxy` ✅ Still Active
@@ -129,8 +144,8 @@ Last event includes `"finishReason": "STOP"` in the candidate.
 # One-time setup (creates user + iptables rule)
 sudo ./scripts/mitm-redirect.sh install
-# Run proxy with standalone LS + MITM
+# Run proxy (standalone + MITM are default)
-RUST_LOG=info ./target/release/antigravity-proxy --standalone
+RUST_LOG=info ./target/release/antigravity-proxy
 # Check usage
 curl -s http://localhost:8741/v1/usage | jq .
--- a/docs/standalone-ls-todo.md
+++ b/docs/standalone-ls-todo.md
@@ -60,19 +60,25 @@ byte-exact protobuf encoder.
 - API endpoint: `daily-cloudcode-pa.googleapis.com/v1internal:streamGenerateContent?alt=sse`
 - SSE response format: `{"response": {"usageMetadata": {"promptTokenCount", "candidatesTokenCount", "thoughtsTokenCount"}, "modelVersion": "..."}}`
-## Test Results (2026-02-14)
+## Test Results (2026-02-15)
 | Endpoint                          | Result                      |
-| --------------------------------- | ------------------------- |
+| --------------------------------- | --------------------------- |
-| `GET /health`                     | ✅                        |
+| `GET /health`                     | OK                          |
-| `GET /v1/models`                  | ✅ 5 models               |
+| `GET /v1/models`                  | OK, 5 models                |
-| `GET /v1/sessions`                | ✅                        |
+| `GET /v1/sessions`                | OK                          |
-| `GET /v1/quota`                   | ✅ real plan/credits      |
+| `GET /v1/quota`                   | OK, real plan/credits       |
-| `GET /v1/usage`                   | ✅ real MITM tokens       |
+| `GET /v1/usage`                   | OK, real MITM tokens        |
-| `POST /v1/responses` (sync)       | ✅                        |
+| `POST /v1/responses` (sync)       | OK                          |
-| `POST /v1/responses` (stream)     | ✅ SSE events             |
+| `POST /v1/responses` (stream)     | OK, full SSE event set      |
-| `POST /v1/responses` (multi-turn) | ✅ context preserved      |
+| `POST /v1/responses` (multi-turn) | OK, context preserved       |
-| `POST /v1/chat/completions`       | ✅                        |
+| `POST /v1/responses` (tools)      | OK, function calls captured |
-| MITM interception                 | ✅ TLS decrypt + parse    |
+| `POST /v1/responses` (images)     | OK, MITM injection          |
-| MITM usage capture                | ✅ per-model token counts |
+| `POST /v1/chat/completions`       | OK                          |
-| UID isolation                     | ✅ no side effects        |
+| `POST /v1/gemini`                 | OK                          |
 | `GET/POST /v1/search`             | OK, grounding + citations   |
 | MITM interception                 | OK, TLS decrypt + parse     |
 | MITM request modification         | OK, tools/images/params     |
 | MITM usage capture                | OK, per-model token counts  |
 | MITM error capture                | OK, instant client feedback |
 | UID isolation                     | OK, no side effects         |
--- a/src/api/types.rs
+++ b/src/api/types.rs
@@ -87,8 +87,9 @@ pub(crate) struct CompletionRequest {
    /// Max completion tokens — forwarded to Google as maxOutputTokens via MITM.
    #[serde(default)]
    pub max_completion_tokens: Option<u64>,
-    /// User identifier — accepted, not used.
+    /// User identifier -- accepted, not used.
    #[serde(default)]
    #[allow(dead_code)]
    pub user: Option<String>,
    /// Frequency penalty — forwarded to Google via MITM.
    #[serde(default)]
@@ -111,8 +112,9 @@ pub(crate) struct CompletionRequest {
    /// Session/conversation ID for multi-turn reuse (custom extension).
    #[serde(default)]
    pub conversation: Option<serde_json::Value>,
-    /// Metadata — accepted and ignored (no upstream equivalent).
+    /// Metadata -- accepted and ignored (no upstream equivalent).
    #[serde(default)]
    #[allow(dead_code)]
    pub metadata: Option<serde_json::Value>,
    /// Number of completions to generate. Each uses a separate cascade (costs N× quota).
    /// Defaults to 1. Only supported in sync mode; streaming always uses n=1.
@@ -163,12 +165,14 @@ pub(crate) struct ResponseFormat {
 pub(crate) struct JsonSchemaFormat {
    /// Schema name (for client identification).
    #[serde(default)]
    #[allow(dead_code)]
    pub name: Option<String>,
    /// The actual JSON schema object — forwarded as Gemini's responseSchema.
    #[serde(default)]
    pub schema: Option<serde_json::Value>,
    /// Whether to enable strict schema adherence.
    #[serde(default)]
    #[allow(dead_code)]
    pub strict: Option<bool>,
 }
--- a/src/mitm/proxy.rs
+++ b/src/mitm/proxy.rs
@@ -633,16 +633,6 @@ async fn handle_http_over_tls(
        };
        // Forward the request — if write fails, reconnect and retry once
        // DEBUG: dump headers and total size
        if req_path.contains("streamGenerateContent") {
            let hdr_end = find_headers_end(&request_buf).unwrap_or(request_buf.len());
            let hdr_str = String::from_utf8_lossy(&request_buf[..hdr_end.min(request_buf.len())]);
            info!(
                total_buf_len = request_buf.len(),
                body_len = request_buf.len() - hdr_end,
                "MITM: sending request to upstream\n{hdr_str}"
            );
        }
        if let Err(e) = conn.write_all(&request_buf).await {
            debug!(domain, error = %e, "MITM: upstream write failed, reconnecting");
            let c = connect_upstream(domain, &upstream_config).await?;
--- a/src/mitm/store.rs
+++ b/src/mitm/store.rs
@@ -68,6 +68,7 @@ pub struct UpstreamError {
    /// HTTP status code from Google (e.g. 400, 429, 500).
    pub status: u16,
    /// Raw error body from Google (usually JSON).
    #[allow(dead_code)]
    pub body: String,
    /// Parsed error message, if available.
    pub message: Option<String>,