From d5e7f09225b968b7d39e9d20306140abe0b093bc Mon Sep 17 00:00:00 2001
From: Nikketryhard <louisnewmobile@gmail.com>
Date: Sat, 14 Feb 2026 02:24:35 -0600
Subject: [PATCH] =?UTF-8?q?feat:=20initial=20commit=20=E2=80=94=20antigrav?=
 =?UTF-8?q?ity=20proxy=20with=20MITM,=20standalone=20LS,=20and=20snapshot?=
 =?UTF-8?q?=20tooling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                |    8 +
 Cargo.lock                | 2470 +++++++++++++++++++++++++++++++++++++
 Cargo.toml                |   46 +
 GEMINI.md                 |  155 +++
 KNOWN_ISSUES.md           |  109 ++
 README.md                 |  239 ++++
 scripts/mitm-wrapper.sh   |  331 +++++
 scripts/parse-snapshot.py |  475 +++++++
 scripts/standalone-ls.sh  |  277 +++++
 src/api/completions.rs    |  343 +++++
 src/api/mod.rs            |  176 +++
 src/api/models.rs         |   49 +
 src/api/polling.rs        |  298 +++++
 src/api/responses.rs      |  686 ++++++++++
 src/api/types.rs          |  241 ++++
 src/api/util.rs           |   36 +
 src/backend.rs            |  462 +++++++
 src/constants.rs          |  217 ++++
 src/main.rs               |  332 +++++
 src/mitm/ca.rs            |  218 ++++
 src/mitm/h2_handler.rs    |  512 ++++++++
 src/mitm/intercept.rs     |  271 ++++
 src/mitm/mod.rs           |   19 +
 src/mitm/proto.rs         |  584 +++++++++
 src/mitm/proxy.rs         |  591 +++++++++
 src/mitm/store.rs         |  163 +++
 src/proto.rs              |  233 ++++
 src/quota.rs              |  218 ++++
 src/session.rs            |  152 +++
 src/warmup.rs             |   69 ++
 30 files changed, 9980 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 create mode 100644 GEMINI.md
 create mode 100644 KNOWN_ISSUES.md
 create mode 100644 README.md
 create mode 100755 scripts/mitm-wrapper.sh
 create mode 100644 scripts/parse-snapshot.py
 create mode 100755 scripts/standalone-ls.sh
 create mode 100644 src/api/completions.rs
 create mode 100644 src/api/mod.rs
 create mode 100644 src/api/models.rs
 create mode 100644 src/api/polling.rs
 create mode 100644 src/api/responses.rs
 create mode 100644 src/api/types.rs
 create mode 100644 src/api/util.rs
 create mode 100644 src/backend.rs
 create mode 100644 src/constants.rs
 create mode 100644 src/main.rs
 create mode 100644 src/mitm/ca.rs
 create mode 100644 src/mitm/h2_handler.rs
 create mode 100644 src/mitm/intercept.rs
 create mode 100644 src/mitm/mod.rs
 create mode 100644 src/mitm/proto.rs
 create mode 100644 src/mitm/proxy.rs
 create mode 100644 src/mitm/store.rs
 create mode 100644 src/proto.rs
 create mode 100644 src/quota.rs
 create mode 100644 src/session.rs
 create mode 100644 src/warmup.rs

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..85a758c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+# Build
+/target/
+
+# Debug artifacts
+*.log
+*.txt
+!README.txt
+test_output.json
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..06d34bc
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,2470 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "anstream"
+version = "0.6.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
+dependencies = [
+ "anstyle",
+ "once_cell_polyfill",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "antigravity-proxy"
+version = "3.0.0"
+dependencies = [
+ "async-stream",
+ "axum",
+ "base64",
+ "brotli 7.0.0",
+ "bytes",
+ "chrono",
+ "clap",
+ "flate2",
+ "http",
+ "http-body-util",
+ "httparse",
+ "hyper",
+ "hyper-util",
+ "rand",
+ "rcgen",
+ "regex",
+ "rustls",
+ "rustls-native-certs",
+ "rustls-pemfile",
+ "serde",
+ "serde_json",
+ "time",
+ "tokio",
+ "tokio-rustls",
+ "tokio-stream",
+ "tower-http",
+ "tracing",
+ "tracing-subscriber",
+ "uuid",
+ "wreq",
+ "wreq-util",
+]
+
+[[package]]
+name = "async-stream"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
+dependencies = [
+ "async-stream-impl",
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "async-stream-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
+[[package]]
+name = "autocfg"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+
+[[package]]
+name = "aws-lc-rs"
+version = "1.15.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b7b6141e96a8c160799cc2d5adecd5cbbe5054cb8c7c4af53da0f83bb7ad256"
+dependencies = [
+ "aws-lc-sys",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-sys"
+version = "0.37.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549"
+dependencies = [
+ "cc",
+ "cmake",
+ "dunce",
+ "fs_extra",
+]
+
+[[package]]
+name = "axum"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
+dependencies = [
+ "axum-core",
+ "bytes",
+ "form_urlencoded",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "serde_core",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "bindgen"
+version = "0.72.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "itertools",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
+
+[[package]]
+name = "boring-sys2"
+version = "5.0.0-alpha.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6708f31d293423b48662069e699b8a79ac1c2a5d8de2c560c2cc0958e480d377"
+dependencies = [
+ "bindgen",
+ "cmake",
+ "fs_extra",
+ "fslock",
+]
+
+[[package]]
+name = "boring2"
+version = "5.0.0-alpha.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d190e62fc07d3433265641a2df3109baab591175c42a39a5f2379b584f3f0768"
+dependencies = [
+ "bitflags",
+ "boring-sys2",
+ "foreign-types",
+ "libc",
+ "openssl-macros",
+]
+
+[[package]]
+name = "brotli"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor 4.0.3",
+]
+
+[[package]]
+name = "brotli"
+version = "8.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor 5.0.0",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "4.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "5.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.19.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
+[[package]]
+name = "cc"
+version = "1.2.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29"
+dependencies = [
+ "find-msvc-tools",
+ "jobserver",
+ "libc",
+ "shlex",
+]
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "chrono"
+version = "0.4.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118"
+dependencies = [
+ "iana-time-zone",
+ "js-sys",
+ "num-traits",
+ "wasm-bindgen",
+ "windows-link",
+]
+
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "clap"
+version = "4.5.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806"
+dependencies = [
+ "clap_builder",
+ "clap_derive",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.5.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "clap_lex",
+ "strsim",
+]
+
+[[package]]
+name = "clap_derive"
+version = "4.5.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+
+[[package]]
+name = "cmake"
+version = "0.1.57"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75443c44cd6b379beb8c5b45d85d0773baf31cce901fe7bb252f4eff3008ef7d"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "colorchoice"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "deranged"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4"
+dependencies = [
+ "powerfmt",
+]
+
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "dunce"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "foreign-types"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965"
+dependencies = [
+ "foreign-types-macros",
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-macros"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
+[[package]]
+name = "fslock"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e"
+
+[[package]]
+name = "futures-sink"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7"
+
+[[package]]
+name = "futures-task"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+
+[[package]]
+name = "futures-util"
+version = "0.3.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "pin-utils",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "h2"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e"
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "http2"
+version = "0.5.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "faafa0f89e637a3524a9c081bcf825996e841afac53e691943e72049cea56f0a"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap",
+ "parking_lot",
+ "slab",
+ "smallvec",
+ "tokio",
+ "tokio-util",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "pin-utils",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "bytes",
+ "http",
+ "http-body",
+ "hyper",
+ "pin-project-lite",
+ "tokio",
+ "tower-service",
+]
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "icu_collections"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
+
+[[package]]
+name = "icu_properties"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
+
+[[package]]
+name = "icu_provider"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+]
+
+[[package]]
+name = "ipnet"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+
+[[package]]
+name = "jobserver"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
+dependencies = [
+ "getrandom 0.3.4",
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.85"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.181"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5"
+
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link",
+]
+
+[[package]]
+name = "litemap"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "matchers"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9"
+dependencies = [
+ "regex-automata",
+]
+
+[[package]]
+name = "matchit"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
+
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "mime"
+version = "0.3.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "mio"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "nu-ansi-term"
+version = "0.50.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "num-conv"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050"
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "openssl-probe"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "pem"
+version = "3.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be"
+dependencies = [
+ "base64",
+ "serde_core",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
+
+[[package]]
+name = "pin-utils"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
+[[package]]
+name = "potential_utf"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
+dependencies = [
+ "zerovec",
+]
+
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom 0.2.17",
+]
+
+[[package]]
+name = "rcgen"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75e669e5202259b5314d1ea5397316ad400819437857b90861765f24c4cf80a2"
+dependencies = [
+ "pem",
+ "ring",
+ "rustls-pki-types",
+ "time",
+ "yasna",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
+
+[[package]]
+name = "ring"
+version = "0.17.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom 0.2.17",
+ "libc",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+
+[[package]]
+name = "rustls"
+version = "0.23.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b"
+dependencies = [
+ "aws-lc-rs",
+ "log",
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
+[[package]]
+name = "rustls-pemfile"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50"
+dependencies = [
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd"
+dependencies = [
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.103.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53"
+dependencies = [
+ "aws-lc-rs",
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
+[[package]]
+name = "schannel"
+version = "0.1.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "schnellru"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "356285bbf17bea63d9e52e96bd18f039672ac92b55b8cb997d6162a2a37d1649"
+dependencies = [
+ "ahash",
+ "cfg-if",
+ "hashbrown 0.13.2",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "security-framework"
+version = "3.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38"
+dependencies = [
+ "bitflags",
+ "core-foundation",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework-sys"
+version = "2.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.149"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_path_to_error"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "sharded-slab"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
+dependencies = [
+ "lazy_static",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "signal-hook-registry"
+version = "1.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b"
+dependencies = [
+ "errno",
+ "libc",
+]
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "socket2"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0"
+dependencies = [
+ "libc",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "strsim"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
+
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
+[[package]]
+name = "syn"
+version = "2.0.115"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e614ed320ac28113fa64972c4262d5dbc89deacdfd00c34a3e4cea073243c12"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thread_local"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "time"
+version = "0.3.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
+dependencies = [
+ "deranged",
+ "num-conv",
+ "powerfmt",
+ "serde_core",
+ "time-core",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
+
+[[package]]
+name = "tinystr"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tokio"
+version = "1.49.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-boring2"
+version = "5.0.0-alpha.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8332a3a493b27722984a621fc00dc6e46211ac034083f4aab4305fc1bbae2b85"
+dependencies = [
+ "boring2",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tokio-rustls"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
+dependencies = [
+ "rustls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-stream"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
+dependencies = [
+ "futures-core",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags",
+ "bytes",
+ "http",
+ "pin-project-lite",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+ "valuable",
+]
+
+[[package]]
+name = "tracing-log"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
+dependencies = [
+ "log",
+ "once_cell",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-subscriber"
+version = "0.3.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
+dependencies = [
+ "matchers",
+ "nu-ansi-term",
+ "once_cell",
+ "regex-automata",
+ "sharded-slab",
+ "smallvec",
+ "thread_local",
+ "tracing",
+ "tracing-core",
+ "tracing-log",
+]
+
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
+[[package]]
+name = "typed-builder"
+version = "0.23.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "31aa81521b70f94402501d848ccc0ecaa8f93c8eb6999eb9747e72287757ffda"
+dependencies = [
+ "typed-builder-macro",
+]
+
+[[package]]
+name = "typed-builder-macro"
+version = "0.23.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "076a02dc54dd46795c2e9c8282ed40bcfb1e22747e955de9389a1de28190fb26"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e"
+
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "uuid"
+version = "1.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f"
+dependencies = [
+ "getrandom 0.3.4",
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "valuable"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.108"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "webpki-root-certs"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca"
+dependencies = [
+ "rustls-pki-types",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+
+[[package]]
+name = "wreq"
+version = "6.0.0-rc.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f79937f6c4df65b3f6f78715b9de2977afe9ee3b3436483c7949a24511e25935"
+dependencies = [
+ "ahash",
+ "boring2",
+ "brotli 8.0.2",
+ "bytes",
+ "flate2",
+ "futures-channel",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "http2",
+ "httparse",
+ "ipnet",
+ "libc",
+ "percent-encoding",
+ "pin-project-lite",
+ "schnellru",
+ "serde",
+ "serde_json",
+ "smallvec",
+ "socket2",
+ "tokio",
+ "tokio-boring2",
+ "tower",
+ "url",
+ "want",
+ "webpki-root-certs",
+ "zstd",
+]
+
+[[package]]
+name = "wreq-util"
+version = "3.0.0-rc.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c6bbe24d28beb9ceb58b514bd6a613c759d3b706f768b9d2950d5d35b543c04"
+dependencies = [
+ "typed-builder",
+ "wreq",
+]
+
+[[package]]
+name = "writeable"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
+
+[[package]]
+name = "yasna"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e17bb3549cc1321ae1296b9cdc2698e2b6cb1992adfa19a8c72e5b7a738f44cd"
+dependencies = [
+ "time",
+]
+
+[[package]]
+name = "yoke"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
+
+[[package]]
+name = "zerotrie"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
+[[package]]
+name = "zstd"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "7.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
+dependencies = [
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.16+zstd.1.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..1305e3f
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,46 @@
+[package]
+name = "antigravity-proxy"
+version = "3.0.0"
+edition = "2021"
+
+[dependencies]
+axum = { version = "0.8", features = ["json"] }
+tokio = { version = "1", features = ["full"] }
+wreq = { version = "6.0.0-rc.28", features = ["json"] }
+wreq-util = "3.0.0-rc.10"
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+uuid = { version = "1", features = ["v4"] }
+regex = "1"
+async-stream = "0.3"
+tower-http = { version = "0.6", features = ["cors"] }
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+clap = { version = "4", features = ["derive"] }
+rand = "0.8"
+flate2 = "1"
+brotli = "7"
+chrono = "0.4"
+
+# MITM proxy dependencies
+rcgen = "0.13"
+rustls = { version = "0.23", features = ["ring"] }
+tokio-rustls = "0.26"
+rustls-native-certs = "0.8"
+rustls-pemfile = "2"
+time = "0.3"
+base64 = "0.22"
+httparse = "1"
+
+# HTTP/2 + gRPC interception
+hyper = { version = "1", features = ["http2", "client", "server"] }
+hyper-util = { version = "0.1", features = ["tokio"] }
+http-body-util = "0.1"
+http = "1"
+bytes = "1"
+tokio-stream = "0.1"
+
+[profile.release]
+opt-level = "z"
+lto = true
+strip = true
diff --git a/GEMINI.md b/GEMINI.md
new file mode 100644
index 0000000..fd87d74
--- /dev/null
+++ b/GEMINI.md
@@ -0,0 +1,155 @@
+# Antigravity Rust Proxy
+
+OpenAI-compatible proxy that intercepts and relays requests to Google's Antigravity language server, impersonating the real Electron webview.
+
+## Quick Start
+
+```bash
+# Build
+cargo build --release
+
+# Run (language server must be running)
+RUST_LOG=info ./target/release/antigravity-proxy
+
+# Custom port
+RUST_LOG=info ./target/release/antigravity-proxy --port 9000
+```
+
+Default port: **8741**
+
+## Endpoints
+
+| Method   | Path                   | Description                                                 |
+| -------- | ---------------------- | ----------------------------------------------------------- |
+| `POST`   | `/v1/responses`        | **Responses API** (primary) — supports `stream: true/false` |
+| `POST`   | `/v1/chat/completions` | Chat Completions API (OpenAI compat shim)                   |
+| `GET`    | `/v1/models`           | List available models                                       |
+| `GET`    | `/v1/sessions`         | List active sessions                                        |
+| `DELETE` | `/v1/sessions/:id`     | Delete a session                                            |
+| `POST`   | `/v1/token`            | Set OAuth token at runtime                                  |
+| `GET`    | `/v1/usage`            | MITM-intercepted token usage stats                          |
+| `GET`    | `/v1/quota`            | LS quota — credits, per-model rate limits, reset timers     |
+| `GET`    | `/health`              | Health check                                                |
+
+## Available Models
+
+| Name                | Label                                    |
+| ------------------- | ---------------------------------------- |
+| `opus-4.6`          | Claude Opus 4.6 (Thinking) — **default** |
+| `opus-4.5`          | Claude Opus 4.5 (Thinking)               |
+| `gemini-3-pro-high` | Gemini 3 Pro (High)                      |
+| `gemini-3-pro`      | Gemini 3 Pro (Low)                       |
+| `gemini-3-flash`    | Gemini 3 Flash                           |
+
+## Example: Responses API
+
+### Sync
+
+```bash
+curl -s http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "Say hello in exactly 3 words",
+    "stream": false,
+    "timeout": 60
+  }' | jq .
+```
+
+### Streaming
+
+```bash
+curl -N http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "Say hello in exactly 3 words",
+    "stream": true,
+    "timeout": 60
+  }'
+```
+
+### Multi-turn (session reuse)
+
+```bash
+curl -s http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "What is 2+2?",
+    "conversation": "my-session-1",
+    "stream": false
+  }' | jq .
+
+# Follow-up in same cascade:
+curl -s http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "Now multiply that by 10",
+    "conversation": "my-session-1",
+    "stream": false
+  }' | jq .
+```
+
+## Authentication
+
+The proxy needs an OAuth token. Three ways to provide it:
+
+1. **Environment variable**: `export ANTIGRAVITY_OAUTH_TOKEN=ya29.xxx`
+2. **Token file**: `echo 'ya29.xxx' > ~/.config/antigravity-proxy-token`
+3. **Runtime API**: `curl -X POST http://localhost:8741/v1/token -d '{"token":"ya29.xxx"}'`
+
+## Version Detection
+
+Version strings (Antigravity, Chrome, Electron, Client) are **auto-detected** at startup from the installed Antigravity app:
+
+- `product.json` → app version + client/IDE version
+- Binary → Chrome + Electron versions via `strings`
+
+Falls back to hardcoded values if the app isn't installed. No manual updates needed when Antigravity updates.
+
+## Stealth Features
+
+- **TLS fingerprint**: BoringSSL with Chrome 142 JA3/JA4 + H2 fingerprint via `wreq`
+- **Protobuf**: Hand-rolled encoder producing byte-exact match to real webview traffic
+- **Warmup**: Mimics real webview startup RPC calls
+- **Heartbeat**: Periodic keep-alive matching real webview lifecycle
+- **Jitter**: Randomized polling intervals to avoid automation fingerprint
+- **Session reuse**: Cascades are reused for multi-turn, matching real webview behavior
+- **MITM proxy**: TLS-intercepting proxy for real token usage capture (opt-in)
+
+## MITM Proxy
+
+Built-in MITM proxy intercepts LS ↔ Google/Anthropic traffic to capture **real** token usage (input, output, cache read, cache creation). Disabled with `--no-mitm`.
+
+### Setup
+
+```bash
+# 1. Start proxy (generates CA cert automatically)
+RUST_LOG=info ./target/release/antigravity-proxy
+
+# 2. Install wrapper (patches LS binary to route through MITM)
+./scripts/mitm-wrapper.sh install
+
+# 3. Restart Antigravity — done!
+
+# Check status
+./scripts/mitm-wrapper.sh status
+
+# Uninstall
+./scripts/mitm-wrapper.sh uninstall
+```
+
+### Usage Stats
+
+```bash
+curl -s http://localhost:8741/v1/usage | jq .
+```
+
+Returns aggregate token counts from all intercepted API calls.
+
+### CLI Flags
+
+- `--no-mitm`: Disable MITM proxy entirely
+- `--mitm-port <PORT>`: Override MITM proxy port (default: auto-assign)
diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md
new file mode 100644
index 0000000..4d0f1e6
--- /dev/null
+++ b/KNOWN_ISSUES.md
@@ -0,0 +1,109 @@
+# Known Issues & Future Work
+
+---
+
+## Medium
+
+### 1. Cascade Correlation Is Heuristic
+
+**File:** `src/mitm/intercept.rs` — `extract_cascade_hint()`
+
+The MITM proxy matches intercepted API traffic to cascade IDs by scanning for `metadata.user_id` or `workspace_id` in the request body. If neither is found, it stores under `_latest`. Since `take_usage()` no longer falls back to `_latest`, unidentified requests will have **no MITM usage data at all**.
+
+**Fix:** Investigate the actual request body format the LS sends for better correlation keys. Alternatively, use timing-based correlation (match MITM capture timestamp to cascade polling window).
+
+---
+
+### 2. Domain Certificate Cache Is Unbounded
+
+**File:** `src/mitm/ca.rs` — `domain_cache`
+
+The `domain_cache` (`HashMap<String, Arc<ServerConfig>>`) grows without bound. Each unique domain gets a cached entry containing a full `ServerConfig` with an RSA key pair. In practice, only ~5–10 domains are intercepted so this is unlikely to matter, but there's no eviction.
+
+**Fix:** Set a max cache size (e.g., 100 entries) with LRU eviction, or use a TTL since leaf certs are generated with a 1-year validity.
+
+---
+
+### 3. Request Modification Not Implemented
+
+**File:** `src/mitm/proxy.rs` — `modify_requests: false`
+
+The `MitmConfig.modify_requests` flag exists and is plumbed through, but no actual modification logic is implemented. The flag is hardcoded to `false`.
+
+**Fix:** When needed, implement request body mutation in `handle_http_over_tls()` — parse JSON, modify, reserialize, update `Content-Length`.
+
+---
+
+### 4. `total_cost_usd` Is Dead
+
+**File:** `src/mitm/store.rs` (line 28)
+
+`ApiUsage.total_cost_usd` is `Option<f64>` but is **always `None`** — set to `None` in all 4 construction sites (`h2_handler.rs` ×2, `intercept.rs` ×2). Neither Anthropic nor Google include cost in API responses.
+
+**Fix:** Either remove the field (simpler), or populate it via a pricing table lookup (model → $/1K tokens) at `record_usage()` time.
+
+---
+
+## 🟢 Low
+
+### 5. Wrapper Script Fallback Paths May Be Stale
+
+**File:** `scripts/mitm-wrapper.sh` — `LS_FALLBACK_DIRS`
+
+The fallback glob patterns (e.g., `~/.cursor/extensions/antigravity.antigravity-*/...`) assume a specific extension naming convention. These are only used when no running LS process is found via `/proc` scanning (Method 1), which is the primary and robust discovery mechanism.
+
+**Impact:** Only affects `install` when the LS isn't running. Low priority.
+
+---
+
+### 6. No Integration Tests for MITM Module
+
+The MITM module has unit tests for protobuf decoding and intercept parsing, but no integration tests that verify:
+
+- TLS interception end-to-end with the generated CA
+- Full HTTP/1.1 request/response cycle through the proxy
+- gRPC (HTTP/2) request/response cycle through `h2_handler`
+- Store recording and retrieval under concurrency
+- Wrapper script install/uninstall lifecycle
+
+---
+
+## 🔍 Investigation
+
+### 7. LS Exposes Credit/Quota Data via `GetUserStatus`
+
+**Confirmed via live probing.** The LS's `GetUserStatus` RPC already returns structured cost/quota data:
+
+```json
+"planStatus": {
+    "planInfo": {
+        "planName": "Pro",
+        "monthlyPromptCredits": 50000,
+        "monthlyFlowCredits": 150000,
+        "monthlyFlexCreditPurchaseAmount": 25000,
+        "canBuyMoreCredits": true
+    },
+    "availablePromptCredits": 500,
+    "availableFlowCredits": 100
+}
+```
+
+Each model also includes **per-model quota info**:
+
+```json
+"quotaInfo": {
+    "remainingFraction": 0.2,
+    "resetTime": "2026-02-14T07:41:37Z"
+}
+```
+
+**Key findings:**
+
+- `GetUserStatus` is the single source for credit/quota data (exposed via `LanguageServerService`)
+- `SeatManagementService` methods (`GetPlanStatus`, `GetTeamCreditEntries`, `GetCascadeAnalytics`, `GetUserSubscription`) are **not routed through the LS** — they're backend-only
+- `PredictionService/RetrieveUserQuota` is also backend-only (not proxied by LS)
+- `GetUserAnalyticsSummary` returns empty `{}` (may not be implemented or requires different context)
+- `GetModelStatuses` returns empty `{}` (separate from the model configs in `GetUserStatus`)
+- `userTier` field shows subscription level: `{"id": "g1-ultra-tier", "name": "Google AI Ultra"}`
+
+**Potential use:** We could poll `GetUserStatus` periodically and expose `availablePromptCredits`, `availableFlowCredits`, and per-model `remainingFraction` via the `/v1/usage` endpoint — giving users real-time credit burn visibility without needing MITM token counting.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..52d5b30
--- /dev/null
+++ b/README.md
@@ -0,0 +1,239 @@
+# Antigravity Proxy
+
+OpenAI-compatible proxy that intercepts and relays requests to Google's Antigravity language server, impersonating the real Electron webview.
+
+## Quick Start
+
+```bash
+# Build
+cargo build --release
+
+# Run (language server must be running)
+RUST_LOG=info ./target/release/antigravity-proxy
+
+# Custom port
+RUST_LOG=info ./target/release/antigravity-proxy --port 9000
+```
+
+Default port: **8741**
+
+## Endpoints
+
+| Method   | Path                   | Description                                                 |
+| -------- | ---------------------- | ----------------------------------------------------------- |
+| `POST`   | `/v1/responses`        | **Responses API** (primary) — supports `stream: true/false` |
+| `POST`   | `/v1/chat/completions` | Chat Completions API (OpenAI compat shim)                   |
+| `GET`    | `/v1/models`           | List available models                                       |
+| `GET`    | `/v1/sessions`         | List active sessions                                        |
+| `DELETE` | `/v1/sessions/:id`     | Delete a session                                            |
+| `POST`   | `/v1/token`            | Set OAuth token at runtime                                  |
+| `GET`    | `/v1/usage`            | MITM-intercepted token usage stats                          |
+| `GET`    | `/v1/quota`            | LS quota — credits, per-model rate limits, reset timers     |
+| `GET`    | `/health`              | Health check                                                |
+
+## Available Models
+
+| Name                | Label                                    |
+| ------------------- | ---------------------------------------- |
+| `opus-4.6`          | Claude Opus 4.6 (Thinking) — **default** |
+| `opus-4.5`          | Claude Opus 4.5 (Thinking)               |
+| `gemini-3-pro-high` | Gemini 3 Pro (High)                      |
+| `gemini-3-pro`      | Gemini 3 Pro (Low)                       |
+| `gemini-3-flash`    | Gemini 3 Flash                           |
+
+## Example: Responses API
+
+### Sync
+
+```bash
+curl -s http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "Say hello in exactly 3 words",
+    "stream": false,
+    "timeout": 60
+  }' | jq .
+```
+
+### Streaming
+
+```bash
+curl -N http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "Say hello in exactly 3 words",
+    "stream": true,
+    "timeout": 60
+  }'
+```
+
+### Multi-turn (session reuse)
+
+```bash
+# First message
+curl -s http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "What is 2+2?",
+    "conversation": "my-session-1",
+    "stream": false
+  }' | jq .
+
+# Follow-up in same cascade
+curl -s http://localhost:8741/v1/responses \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gemini-3-flash",
+    "input": "Now multiply that by 10",
+    "conversation": "my-session-1",
+    "stream": false
+  }' | jq .
+```
+
+## Authentication
+
+The proxy needs an OAuth token. Three ways to provide it:
+
+1. **Environment variable**: `export ANTIGRAVITY_OAUTH_TOKEN=ya29.xxx`
+2. **Token file**: `echo 'ya29.xxx' > ~/.config/antigravity-proxy-token`
+3. **Runtime API**: `curl -X POST http://localhost:8741/v1/token -d '{"token":"ya29.xxx"}'`
+
+## Stealth Features
+
+- **TLS fingerprint**: BoringSSL with Chrome 142 JA3/JA4 + H2 fingerprint via `wreq`
+- **Protobuf**: Hand-rolled encoder producing byte-exact match to real webview traffic
+- **Warmup**: Mimics real webview startup RPC calls
+- **Heartbeat**: Periodic keep-alive matching real webview lifecycle
+- **Jitter**: Randomized polling intervals to avoid automation fingerprint
+- **Session reuse**: Cascades are reused for multi-turn, matching real webview behavior
+- **Version detection**: Auto-detects Antigravity/Chrome/Electron versions from installed app
+
+## MITM Proxy
+
+Built-in TLS-intercepting proxy captures real token usage from LS ↔ Google/Anthropic traffic. Disabled with `--no-mitm`.
+
+### Setup
+
+```bash
+# 1. Start proxy (generates CA cert automatically)
+RUST_LOG=info ./target/release/antigravity-proxy
+
+# 2. Install wrapper (patches LS binary to route through MITM)
+sudo ./scripts/mitm-wrapper.sh install
+
+# 3. Restart Antigravity — done!
+
+# Check status
+./scripts/mitm-wrapper.sh status
+
+# Uninstall
+sudo ./scripts/mitm-wrapper.sh uninstall
+```
+
+### Usage Stats
+
+```bash
+curl -s http://localhost:8741/v1/usage | jq .
+```
+
+## Standalone Language Server
+
+Launch an isolated LS instance for experimentation:
+
+```bash
+# Basic test (starts, checks quota, exits)
+./scripts/standalone-ls.sh
+
+# Foreground mode (stays alive)
+./scripts/standalone-ls.sh --fg
+
+# With MITM traffic interception
+./scripts/standalone-ls.sh --mitm
+
+# Capture a clean traffic snapshot
+./scripts/standalone-ls.sh --snapshot
+
+# Snapshot with custom prompt
+./scripts/standalone-ls.sh --snapshot --prompt "Explain quantum computing"
+```
+
+The standalone LS shares the main Antigravity app's OAuth (via its extension server) but has its own port, data directory, and cascades.
+
+### Traffic Snapshots
+
+The `--snapshot` flag captures all HTTP/2 traffic and formats it into a clean, color-coded report:
+
+```
+══════════════════════════════════════════════════════════════════════
+  STANDALONE LS TRAFFIC SNAPSHOT
+══════════════════════════════════════════════════════════════════════
+
+▸ Outbound Connections
+  → antigravity-unleash.goog    (Feature Flags)
+  → play.googleapis.com         (Telemetry)
+
+══════════════════════════════════════════════════════════════════════
+  antigravity-unleash.goog — Feature Flags
+══════════════════════════════════════════════════════════════════════
+
+  → POST /api/client/register
+    authorization: *:production.e4455...
+    unleash-appname: codeium-language-server
+    Body (561 bytes, JSON):
+      {"appName":"codeium-language-server","instanceId":"..."}
+```
+
+## Architecture
+
+```mermaid
+graph LR
+    A[Your App<br/>OpenAI SDK] -->|HTTP| B[Proxy<br/>:8741]
+    B -->|gRPC| C[Language<br/>Server]
+    C -->|HTTPS| D[Google /<br/>Anthropic]
+    E[MITM Proxy<br/>:8742] -.->|intercept| D
+    C -.->|routed via| E
+```
+
+## Project Structure
+
+```
+src/
+├── main.rs           # Entry point, CLI args, lifecycle
+├── backend.rs        # LS discovery and RPC communication
+├── constants.rs      # Version detection + stealth constants
+├── proto.rs          # Hand-rolled protobuf encoder
+├── quota.rs          # LS quota polling and caching
+├── session.rs        # Multi-turn session management
+├── warmup.rs         # Startup warmup (mimics real webview)
+├── api/
+│   └── mod.rs        # Axum API server + route handlers
+└── mitm/
+    ├── mod.rs         # MITM module root
+    ├── ca.rs          # Dynamic CA cert generation
+    ├── proxy.rs       # TLS-intercepting proxy server
+    ├── intercept.rs   # API response parser (usage extraction)
+    └── store.rs       # Token usage aggregation store
+
+scripts/
+├── mitm-wrapper.sh     # Install/uninstall MITM wrapper on LS binary
+├── standalone-ls.sh    # Launch isolated LS instance
+└── parse-snapshot.py   # HTTP/2 traffic snapshot parser
+```
+
+## CLI Flags
+
+```
+antigravity-proxy [OPTIONS]
+
+Options:
+  --port <PORT>          API server port (default: 8741)
+  --no-mitm              Disable MITM proxy
+  --mitm-port <PORT>     Override MITM proxy port (default: auto)
+```
+
+## License
+
+Private. Do not distribute.
diff --git a/scripts/mitm-wrapper.sh b/scripts/mitm-wrapper.sh
new file mode 100755
index 0000000..e158ffe
--- /dev/null
+++ b/scripts/mitm-wrapper.sh
@@ -0,0 +1,331 @@
+#!/usr/bin/env bash
+# ╔═══════════════════════════════════════════════════════════════════════════╗
+# ║  Antigravity MITM LS Wrapper                                            ║
+# ║                                                                         ║
+# ║  This script replaces the real Antigravity language server binary.       ║
+# ║  It injects HTTPS_PROXY and NODE_EXTRA_CA_CERTS environment variables   ║
+# ║  so the MITM proxy can intercept LS<->API traffic.                      ║
+# ║                                                                         ║
+# ║  Install:   ./mitm-wrapper.sh install                                   ║
+# ║  Uninstall: ./mitm-wrapper.sh uninstall                                 ║
+# ║  (No args = act as wrapper, exec the real binary with injected env)     ║
+# ╚═══════════════════════════════════════════════════════════════════════════╝
+set -euo pipefail
+
+# ── Config ────────────────────────────────────────────────────────────────────
+# Resolve the real user's home (not /root when running under sudo)
+if [[ -n "${SUDO_USER:-}" ]]; then
+    REAL_HOME="$(getent passwd "$SUDO_USER" | cut -d: -f6)"
+else
+    REAL_HOME="$HOME"
+fi
+MITM_PORT_FILE="${REAL_HOME}/.config/antigravity-proxy/mitm-port"
+if [[ -n "${ANTIGRAVITY_MITM_PORT:-}" ]]; then
+    MITM_PORT="$ANTIGRAVITY_MITM_PORT"
+elif [[ -f "$MITM_PORT_FILE" ]]; then
+    MITM_PORT="$(cat "$MITM_PORT_FILE" 2>/dev/null || echo 8742)"
+else
+    MITM_PORT="8742"
+fi
+CA_PATH="${REAL_HOME}/.config/antigravity-proxy/mitm-ca.pem"
+
+# Antigravity LS — discovered dynamically from running processes.
+# Hardcoded paths are only used as a fallback if no LS process is running.
+LS_FALLBACK_DIRS=(
+    "/usr/share/antigravity/resources/app/extensions/antigravity/bin"
+    "${REAL_HOME}/.antigravity/extensions/antigravity.antigravity-*/dist/bundled/language-server/bin"
+    "${REAL_HOME}/.cursor/extensions/antigravity.antigravity-*/dist/bundled/language-server/bin"
+    "${REAL_HOME}/.vscode/extensions/antigravity.antigravity-*/dist/bundled/language-server/bin"
+    "/opt/antigravity/language-server/bin"
+)
+
+BACKUP_SUFFIX=".real"
+
+# ── Colors ────────────────────────────────────────────────────────────────────
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+NC='\033[0m'
+
+# ── Find LS binary ───────────────────────────────────────────────────────────
+find_ls_binary() {
+    # Method 1: Find from running process via /proc
+    if [[ -d /proc ]]; then
+        for pid_dir in /proc/[0-9]*; do
+            local exe_target
+            exe_target="$(readlink "${pid_dir}/exe" 2>/dev/null)" || continue
+            # Strip " (deleted)" suffix that appears when the binary was unlinked
+            exe_target="${exe_target% (deleted)}"
+            if [[ "$exe_target" == *language_server_linux* ]] || \
+               [[ "$exe_target" == *antigravity-language-server* ]]; then
+                # FIX: If the running process is the backup (.real), strip the suffix
+                # so we return the path to the base binary name.
+                echo "${exe_target%$BACKUP_SUFFIX}"
+                return 0
+            fi
+        done
+    fi
+
+    # Method 2: Fallback — scan known directories for common binary names
+    local bin_names=("language_server_linux_x64" "language_server_linux_arm64" "antigravity-language-server")
+    for dir_pattern in "${LS_FALLBACK_DIRS[@]}"; do
+        for dir in $dir_pattern; do
+            [[ -d "$dir" ]] || continue
+            for name in "${bin_names[@]}"; do
+                local path="${dir}/${name}"
+                if [[ -f "$path" || -f "${path}${BACKUP_SUFFIX}" ]]; then
+                    echo "$path"
+                    return 0
+                fi
+            done
+        done
+    done
+    return 1
+}
+
+# ── Install ──────────────────────────────────────────────────────────────────
+cmd_install() {
+    # Find the LS binary first (quiet, just to check permissions)
+    local ls_path
+    ls_path=$(find_ls_binary) || ls_path="${1:-}"
+
+    # Allow override
+    if [[ -n "${1:-}" ]]; then
+        ls_path="$1"
+    fi
+
+    # Check permissions upfront — re-exec with sudo before doing anything
+    if [[ -n "$ls_path" ]]; then
+        local ls_dir
+        ls_dir="$(dirname "$ls_path")"
+        if [[ ! -w "$ls_dir" ]] && [[ "$EUID" -ne 0 ]]; then
+            echo -e "  ${RED}✗${NC} ${ls_dir} requires elevated permissions"
+            echo -e "  run: sudo $0 install ${1:-}"
+            exit 1
+        fi
+    fi
+
+    echo -e "${BOLD}${CYAN}Antigravity MITM Wrapper Installer${NC}"
+    echo -e "───────────────────────────────────"
+    echo ""
+
+    # Find the LS binary (for real this time, with output)
+    if [[ -z "$ls_path" ]]; then
+        echo -e "  ${RED}✗${NC} Could not find Antigravity language server binary."
+        echo -e "    No LS process found in /proc, and fallback paths didn't match."
+        echo ""
+        echo -e "  Set the path manually:"
+        echo -e "    $0 install /path/to/language_server_linux_x64"
+        exit 1
+    fi
+    echo -e "  ${GREEN}✓${NC} Found LS: ${ls_path}"
+
+    local real_path="${ls_path}${BACKUP_SUFFIX}"
+    local wrapper_dir
+    wrapper_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+    local wrapper_src="${wrapper_dir}/mitm-wrapper.sh"
+
+    # Verify the binary exists and is not already wrapped
+    if [[ -f "$real_path" ]]; then
+        echo -e "  ${YELLOW}!${NC} Already installed (backup exists at ${real_path})"
+        echo -e "  Run '$0 uninstall' first to reinstall."
+        exit 0
+    fi
+
+    if [[ ! -f "$ls_path" ]]; then
+        echo -e "  ${RED}✗${NC} Binary not found: ${ls_path}"
+        exit 1
+    fi
+
+    # Verify it's a real binary, not already our wrapper
+    if head -c 100 "$ls_path" | grep -q 'ANTIGRAVITY_MITM_PORT'; then
+        echo -e "  ${YELLOW}!${NC} Already wrapped (script detected). Run '$0 uninstall' first."
+        exit 0
+    fi
+
+    # Check CA cert
+    if [[ ! -f "$CA_PATH" ]]; then
+        echo -e "  ${YELLOW}!${NC}  CA cert not found at ${CA_PATH}"
+        echo -e "    Start the proxy first to generate it."
+        echo -e "    Continuing install anyway..."
+    else
+        echo -e "  ${GREEN}✓${NC} CA cert: ${CA_PATH}"
+    fi
+
+    # Back up the real binary
+    cp -p "$ls_path" "$real_path"
+    echo -e "  ${GREEN}✓${NC} Backed up real binary to ${real_path}"
+
+    # Remove the original before writing (avoids "Text file busy" if LS is running)
+    rm -f "$ls_path"
+
+    # Create the wrapper script in-place
+    tee "$ls_path" > /dev/null << 'WRAPPER_EOF'
+#!/usr/bin/env bash
+# Antigravity MITM LS Wrapper — auto-generated, do not edit.
+# The LS is a Go binary — it reads HTTPS_PROXY and SSL_CERT_FILE (not NODE_EXTRA_CA_CERTS).
+# Go's gRPC library also reads GRPC_DEFAULT_SSL_ROOTS_FILE_PATH for root certs.
+# We build a combined CA bundle (system CAs + MITM CA) and inject it.
+
+REAL_BINARY="${BASH_SOURCE[0]}.real"
+
+if [[ ! -f "$REAL_BINARY" ]]; then
+    echo "ERROR: Real LS binary not found at $REAL_BINARY" >&2
+    echo "Run 'mitm-wrapper.sh uninstall' and reinstall." >&2
+    exit 1
+fi
+
+# Inject MITM proxy (don't override if already set)
+export HTTPS_PROXY="${HTTPS_PROXY:-http://127.0.0.1:__MITM_PORT__}"
+
+# Build combined CA bundle: system CAs + MITM CA
+MITM_CA="__CA_PATH__"
+COMBINED_CA="/tmp/antigravity-mitm-combined-ca.pem"
+if [[ -f "$MITM_CA" ]]; then
+    # Find system CA bundle
+    SYS_CA=""
+    for candidate in /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt /etc/ssl/cert.pem; do
+        if [[ -f "$candidate" ]]; then
+            SYS_CA="$candidate"
+            break
+        fi
+    done
+    if [[ -n "$SYS_CA" ]]; then
+        cat "$SYS_CA" "$MITM_CA" > "$COMBINED_CA" 2>/dev/null
+        export SSL_CERT_FILE="$COMBINED_CA"
+        # Go's gRPC library may use this instead of SSL_CERT_FILE
+        export GRPC_DEFAULT_SSL_ROOTS_FILE_PATH="$COMBINED_CA"
+    fi
+fi
+
+exec "$REAL_BINARY" "$@"
+WRAPPER_EOF
+
+    # Substitute actual values
+    sed -i "s|__MITM_PORT__|${MITM_PORT}|g" "$ls_path"
+    sed -i "s|__CA_PATH__|${CA_PATH}|g" "$ls_path"
+
+    # Make executable
+    chmod +x "$ls_path"
+
+    echo -e "  ${GREEN}✓${NC} Wrapper installed at ${ls_path}"
+    echo ""
+    echo -e "  ${BOLD}How it works:${NC}"
+    echo -e "    When Antigravity starts the LS, the wrapper will:"
+    echo -e "    1. Set ${CYAN}HTTPS_PROXY${NC}=http://127.0.0.1:${MITM_PORT}"
+    echo -e "    2. Build combined CA bundle (system + MITM) at /tmp/antigravity-mitm-combined-ca.pem"
+    echo -e "    3. Set ${CYAN}SSL_CERT_FILE${NC} to the combined bundle"
+    echo -e "    4. Exec the real LS binary with all original args"
+    echo ""
+    echo -e "  ${YELLOW}Note:${NC} Restart Antigravity for the wrapper to take effect."
+    echo ""
+}
+
+# ── Uninstall ────────────────────────────────────────────────────────────────
+cmd_uninstall() {
+    # Check permissions upfront
+    local ls_path
+    ls_path=$(find_ls_binary) || true
+    if [[ -n "$ls_path" ]] && [[ ! -w "$(dirname "$ls_path")" ]] && [[ "$EUID" -ne 0 ]]; then
+        echo -e "  ${RED}✗${NC} $(dirname "$ls_path") requires elevated permissions"
+        echo -e "  run: sudo $0 uninstall"
+        exit 1
+    fi
+
+    echo -e "${BOLD}${CYAN}Antigravity MITM Wrapper Uninstaller${NC}"
+    echo -e "─────────────────────────────────────"
+    echo ""
+
+    if [[ -n "$ls_path" ]]; then
+        local real_path="${ls_path}${BACKUP_SUFFIX}"
+        if [[ -f "$real_path" ]]; then
+            mv -f "$real_path" "$ls_path"
+            echo -e "  ${GREEN}✓${NC} Restored real binary at ${ls_path}"
+        else
+            echo -e "  ${YELLOW}!${NC} No backup found at ${real_path}"
+            echo -e "    The LS binary may not be wrapped."
+        fi
+    else
+        echo -e "  ${RED}✗${NC} Could not find Antigravity language server binary."
+    fi
+
+    echo ""
+    echo -e "  ${YELLOW}Note:${NC} Restart Antigravity for the change to take effect."
+    echo ""
+}
+
+# ── Status ───────────────────────────────────────────────────────────────────
+cmd_status() {
+    echo -e "${BOLD}${CYAN}Antigravity MITM Wrapper Status${NC}"
+    echo -e "────────────────────────────────"
+    echo ""
+
+    local ls_path
+    if ls_path=$(find_ls_binary); then
+        echo -e "  ${GREEN}✓${NC} LS binary: ${ls_path}"
+
+        local real_path="${ls_path}${BACKUP_SUFFIX}"
+        if [[ -f "$real_path" ]]; then
+            echo -e "  ${GREEN}✓${NC} Wrapper: ${BOLD}installed${NC}"
+            echo -e "  ${GREEN}✓${NC} Real binary: ${real_path}"
+
+            # Check if wrapper is valid
+            if head -c 200 "$ls_path" | grep -q 'MITM LS Wrapper'; then
+                echo -e "  ${GREEN}✓${NC} Wrapper script: valid"
+            else
+                echo -e "  ${RED}✗${NC} Wrapper script: ${BOLD}corrupted or replaced${NC}"
+            fi
+        else
+            echo -e "  ${YELLOW}○${NC} Wrapper: ${BOLD}not installed${NC}"
+        fi
+    else
+        echo -e "  ${RED}✗${NC} LS binary: not found"
+    fi
+
+    # Check CA cert
+    if [[ -f "$CA_PATH" ]]; then
+        echo -e "  ${GREEN}✓${NC} CA cert: ${CA_PATH}"
+    else
+        echo -e "  ${RED}✗${NC} CA cert: not found (start proxy first)"
+    fi
+
+    # Check MITM port
+    if ss -tlnp 2>/dev/null | grep -q ":${MITM_PORT} "; then
+        echo -e "  ${GREEN}✓${NC} MITM proxy: listening on :${MITM_PORT}"
+    else
+        echo -e "  ${YELLOW}○${NC} MITM proxy: not running on :${MITM_PORT}"
+    fi
+
+    echo ""
+}
+
+# ── Main ─────────────────────────────────────────────────────────────────────
+case "${1:-}" in
+    install)
+        shift
+        cmd_install "${1:-}"
+        ;;
+    uninstall)
+        cmd_uninstall
+        ;;
+    status)
+        cmd_status
+        ;;
+    -h|--help)
+        echo "Usage: $0 {install|uninstall|status}"
+        echo ""
+        echo "Commands:"
+        echo "  install [path]   Install MITM wrapper (auto-detect or specify path)"
+        echo "  uninstall        Restore original LS binary"
+        echo "  status           Show wrapper installation status"
+        echo ""
+        echo "Environment:"
+        echo "  ANTIGRAVITY_MITM_PORT  MITM proxy port (default: 8742)"
+        ;;
+    *)
+        echo "Usage: $0 {install|uninstall|status}"
+        exit 1
+        ;;
+esac
\ No newline at end of file
diff --git a/scripts/parse-snapshot.py b/scripts/parse-snapshot.py
new file mode 100644
index 0000000..017617e
--- /dev/null
+++ b/scripts/parse-snapshot.py
@@ -0,0 +1,475 @@
+#!/usr/bin/env python3
+"""
+Parse Go GODEBUG=http2debug=2 output into a clean, readable snapshot.
+
+Usage:
+    python3 parse-snapshot.py < raw-http2-dump.log
+    python3 parse-snapshot.py /path/to/logfile
+"""
+
+import sys
+import re
+import json
+import gzip
+from collections import defaultdict
+from io import BytesIO
+
+# ── Colors ────────────────────────────────────────────────────────────────────
+BOLD = "\033[1m"
+DIM = "\033[2m"
+RED = "\033[91m"
+GREEN = "\033[92m"
+YELLOW = "\033[93m"
+CYAN = "\033[96m"
+MAGENTA = "\033[95m"
+NC = "\033[0m"
+
+# ── Regexes ───────────────────────────────────────────────────────────────────
+RE_ENCODING_HEADER = re.compile(
+    r'http2: Transport encoding header "([^"]+)" = "([^"]*)"'
+)
+RE_DECODED_HEADER = re.compile(
+    r'http2: decoded hpack field header field "([^"]+)" = "([^"]*)"'
+)
+RE_SERVER_ENCODING = re.compile(
+    r'http2: server encoding header "([^"]+)" = "([^"]*)"'
+)
+RE_WROTE_DATA = re.compile(
+    r'http2: Framer [^:]+: wrote DATA flags=(\S+) stream=(\d+) len=(\d+) data="(.*?)"'
+)
+RE_READ_DATA = re.compile(
+    r'http2: Framer [^:]+: read DATA flags=(\S+) stream=(\d+) len=(\d+) data="(.*?)"'
+)
+RE_TRANSPORT_CONN = re.compile(
+    r'http2: Transport creating client conn [^ ]+ to (.+)'
+)
+RE_SERVER_READ_DATA = re.compile(
+    r'http2: server read frame DATA flags=(\S+) stream=(\d+) len=(\d+) data="(.*?)"'
+)
+RE_WROTE_HEADERS = re.compile(
+    r'http2: Framer [^:]+: wrote HEADERS flags=(\S+) stream=(\d+)'
+)
+RE_TIMESTAMP = re.compile(r'^(\d{4}/\d{2}/\d{2} \d{2}:\d{2}:\d{2})')
+RE_LS_LOG = re.compile(r'^[IWE]\d{4} ')
+RE_MAXPROCS = re.compile(r'^.*maxprocs:')
+RE_BYTES_OMITTED = re.compile(r'\((\d+) bytes omitted\)$')
+
+# Known domain purposes
+DOMAIN_INFO = {
+    "antigravity-unleash.goog": ("Feature Flags", "Unleash SDK — controls A/B tests, feature rollouts"),
+    "daily-cloudcode-pa.googleapis.com": ("LLM API (gRPC)", "Primary Gemini/Claude API endpoint"),
+    "cloudcode-pa.googleapis.com": ("LLM API (gRPC)", "Production Gemini/Claude API endpoint"),
+    "api.anthropic.com": ("Claude API", "Direct Anthropic API calls"),
+    "lh3.googleusercontent.com": ("Profile Picture", "User avatar image"),
+    "play.googleapis.com": ("Telemetry", "Google Play telemetry/logging"),
+    "firebaseinstallations.googleapis.com": ("Firebase", "Firebase installation tracking"),
+    "oauth2.googleapis.com": ("OAuth", "Token refresh/exchange"),
+    "speech.googleapis.com": ("Speech", "Voice input processing"),
+    "modelarmor.googleapis.com": ("Safety", "Content safety/filtering"),
+}
+
+
+class Request:
+    def __init__(self):
+        self.method = ""
+        self.path = ""
+        self.authority = ""
+        self.scheme = ""
+        self.headers = {}
+        self.data = b""
+        self.data_len = 0
+        self.stream_id = None
+        self.timestamp = ""
+        self.direction = "outgoing"  # outgoing = LS→upstream, incoming = LS←upstream
+
+
+class Snapshot:
+    def __init__(self):
+        self.connections = []  # (timestamp, target)
+        self.requests = []  # list of Request
+        self.responses = defaultdict(lambda: {"headers": {}, "data": b"", "data_len": 0})
+        self.ls_logs = []
+
+    def parse(self, lines):
+        current_headers = {}
+        current_direction = "outgoing"
+        current_stream = None
+
+        for line in lines:
+            line = line.rstrip()
+
+            # Skip empty
+            if not line:
+                continue
+
+            # LS process logs
+            if RE_LS_LOG.match(line) or RE_MAXPROCS.match(line):
+                self.ls_logs.append(line)
+                continue
+
+            # New connection
+            m = RE_TRANSPORT_CONN.search(line)
+            if m:
+                ts = ""
+                ts_m = RE_TIMESTAMP.match(line)
+                if ts_m:
+                    ts = ts_m.group(1)
+                self.connections.append((ts, m.group(1)))
+                continue
+
+            # Outgoing headers (Transport encoding = LS sending to upstream)
+            m = RE_ENCODING_HEADER.search(line)
+            if m:
+                key, val = m.group(1), m.group(2)
+                if key == ":method":
+                    # New request starting
+                    if current_headers.get(":path"):
+                        self._finalize_request(current_headers, "outgoing", line)
+                    current_headers = {}
+                    current_direction = "outgoing"
+                current_headers[key] = val
+                ts_m = RE_TIMESTAMP.match(line)
+                if ts_m and "timestamp" not in current_headers:
+                    current_headers["timestamp"] = ts_m.group(1)
+                continue
+
+            # Incoming headers (decoded hpack = upstream responding, OR server receiving)
+            m = RE_DECODED_HEADER.search(line)
+            if m:
+                key, val = m.group(1), m.group(2)
+                if key == ":authority" and "server read frame" not in line:
+                    # This is a request received by our LS
+                    if current_headers.get(":path"):
+                        self._finalize_request(current_headers, current_direction, line)
+                    current_headers = {}
+                    current_direction = "incoming"
+                current_headers[key] = val
+                continue
+
+            # Server encoding (our LS responding)
+            m = RE_SERVER_ENCODING.search(line)
+            if m:
+                continue  # Skip server response headers for now
+
+            # Headers frame written (triggers finalization)
+            m = RE_WROTE_HEADERS.search(line)
+            if m:
+                current_stream = m.group(2)
+                if current_headers.get(":path") or current_headers.get(":method"):
+                    req = self._finalize_request(current_headers, current_direction, line)
+                    if req:
+                        req.stream_id = current_stream
+                    current_headers = {}
+                continue
+
+            # Data frames (wrote = LS sending, read = LS receiving)
+            for pattern, direction in [
+                (RE_WROTE_DATA, "sent"),
+                (RE_READ_DATA, "received"),
+                (RE_SERVER_READ_DATA, "server_received"),
+            ]:
+                m = pattern.search(line)
+                if m:
+                    flags, stream, length, data_str = (
+                        m.group(1),
+                        m.group(2),
+                        int(m.group(3)),
+                        m.group(4),
+                    )
+                    # Find matching request by stream
+                    for req in reversed(self.requests):
+                        if req.stream_id == stream:
+                            raw = self._decode_data_str(data_str, line)
+                            if direction == "sent" or direction == "server_received":
+                                req.data += raw
+                                req.data_len = max(req.data_len, length)
+                            break
+                    # Also check omitted bytes
+                    om = RE_BYTES_OMITTED.search(line)
+                    if om:
+                        pass  # length already captured
+                    break
+
+        # Finalize any remaining headers
+        if current_headers.get(":path") or current_headers.get(":method"):
+            self._finalize_request(current_headers, current_direction, "")
+
+    def _finalize_request(self, headers, direction, _line):
+        req = Request()
+        req.method = headers.pop(":method", "GET")
+        req.path = headers.pop(":path", "/")
+        req.authority = headers.pop(":authority", "")
+        req.scheme = headers.pop(":scheme", "https")
+        req.timestamp = headers.pop("timestamp", "")
+        req.direction = direction
+        req.headers = {k: v for k, v in headers.items() if not k.startswith(":")}
+        self.requests.append(req)
+        return req
+
+    def _decode_data_str(self, s, full_line):
+        """Decode escaped string from GODEBUG output back to bytes."""
+        try:
+            # Handle Go's escaped bytes
+            result = bytearray()
+            i = 0
+            while i < len(s):
+                if s[i] == "\\" and i + 1 < len(s):
+                    if s[i + 1] == "x" and i + 3 < len(s):
+                        result.append(int(s[i + 2 : i + 4], 16))
+                        i += 4
+                    elif s[i + 1] == "n":
+                        result.append(10)
+                        i += 2
+                    elif s[i + 1] == "r":
+                        result.append(13)
+                        i += 2
+                    elif s[i + 1] == "t":
+                        result.append(9)
+                        i += 2
+                    elif s[i + 1] == "\\":
+                        result.append(92)
+                        i += 2
+                    elif s[i + 1] == '"':
+                        result.append(34)
+                        i += 2
+                    else:
+                        result.append(ord(s[i]))
+                        i += 1
+                else:
+                    result.append(ord(s[i]))
+                    i += 1
+            return bytes(result)
+        except Exception:
+            return s.encode("utf-8", errors="replace")
+
+    def render(self):
+        out = []
+
+        # Header
+        out.append(f"\n{BOLD}{CYAN}{'═' * 70}{NC}")
+        out.append(f"{BOLD}{CYAN}  STANDALONE LS TRAFFIC SNAPSHOT{NC}")
+        out.append(f"{BOLD}{CYAN}{'═' * 70}{NC}\n")
+
+        # LS Logs
+        if self.ls_logs:
+            out.append(f"{BOLD}▸ Language Server Logs{NC}")
+            out.append(f"{DIM}{'─' * 60}{NC}")
+            for log in self.ls_logs:
+                out.append(f"  {DIM}{log}{NC}")
+            out.append("")
+
+        # Connections
+        if self.connections:
+            out.append(f"{BOLD}▸ Outbound Connections{NC}")
+            out.append(f"{DIM}{'─' * 60}{NC}")
+            for ts, target in self.connections:
+                domain = target.split(":")[0] if ":" in target else target
+                info = DOMAIN_INFO.get(domain, ("Unknown", ""))
+                out.append(
+                    f"  {GREEN}→{NC} {BOLD}{target}{NC}  {DIM}({info[0]}){NC}"
+                )
+                if info[1]:
+                    out.append(f"    {DIM}{info[1]}{NC}")
+            out.append("")
+
+        # Group requests by domain
+        by_domain = defaultdict(list)
+        for req in self.requests:
+            by_domain[req.authority].append(req)
+
+        # Render each domain's requests
+        for domain, reqs in by_domain.items():
+            if domain.startswith("127.0.0.1"):
+                label = "Local (our requests to LS)"
+                color = DIM
+            else:
+                info = DOMAIN_INFO.get(domain, ("External", ""))
+                label = info[0]
+                color = YELLOW if "API" in info[0] else CYAN
+
+            out.append(f"{BOLD}{'═' * 70}{NC}")
+            out.append(f"{BOLD}{color}  {domain}{NC}  {DIM}— {label}{NC}")
+            out.append(f"{BOLD}{'═' * 70}{NC}")
+
+            for i, req in enumerate(reqs):
+                arrow = "→" if req.direction == "outgoing" else "←"
+                method_color = GREEN if req.method == "GET" else YELLOW
+
+                out.append(f"\n  {BOLD}{arrow} {method_color}{req.method}{NC} {req.path}")
+
+                # Important headers
+                interesting = [
+                    "authorization",
+                    "content-type",
+                    "user-agent",
+                    "unleash-appname",
+                    "unleash-instanceid",
+                    "unleash-sdk",
+                    "x-goog-api-key",
+                    "x-goog-api-client",
+                    "grpc-encoding",
+                    "te",
+                ]
+                shown = False
+                for key in interesting:
+                    if key in req.headers:
+                        val = req.headers[key]
+                        # Mask tokens partially
+                        if key == "authorization" and len(val) > 30:
+                            if val.startswith("Bearer "):
+                                val = f"Bearer {val[7:20]}...{val[-10:]}"
+                            elif len(val) > 40:
+                                val = f"{val[:30]}...{val[-10:]}"
+                        out.append(f"    {DIM}{key}:{NC} {val}")
+                        shown = True
+
+                # All other headers (collapsed)
+                other = {
+                    k: v
+                    for k, v in req.headers.items()
+                    if k not in interesting and not k.startswith(":")
+                }
+                if other:
+                    if not shown:
+                        out.append(f"    {DIM}Headers:{NC}")
+                    for k, v in other.items():
+                        out.append(f"    {DIM}{k}:{NC} {v}")
+
+                # Body
+                if req.data:
+                    out.append(self._render_body(req.data, req.data_len))
+
+            out.append("")
+
+        return "\n".join(out)
+
+    def _render_body(self, data, total_len):
+        """Render body data in the most readable format possible."""
+        lines = []
+
+        # Try JSON
+        try:
+            text = data.decode("utf-8")
+            obj = json.loads(text)
+            pretty = json.dumps(obj, indent=2, ensure_ascii=False)
+            lines.append(f"    {BOLD}Body ({len(data)} bytes, JSON):{NC}")
+            for l in pretty.split("\n")[:30]:
+                lines.append(f"      {GREEN}{l}{NC}")
+            if len(pretty.split("\n")) > 30:
+                lines.append(f"      {DIM}... ({len(pretty.split(chr(10))) - 30} more lines){NC}")
+            return "\n".join(lines)
+        except (json.JSONDecodeError, UnicodeDecodeError):
+            pass
+
+        # Try gzip
+        if data[:2] == b"\x1f\x8b":
+            try:
+                decompressed = gzip.decompress(data)
+                try:
+                    text = decompressed.decode("utf-8")
+                    try:
+                        obj = json.loads(text)
+                        pretty = json.dumps(obj, indent=2, ensure_ascii=False)
+                        lines.append(
+                            f"    {BOLD}Body ({len(data)} bytes gzip → {len(decompressed)} bytes, JSON):{NC}"
+                        )
+                        for l in pretty.split("\n")[:50]:
+                            lines.append(f"      {GREEN}{l}{NC}")
+                        if len(pretty.split("\n")) > 50:
+                            lines.append(
+                                f"      {DIM}... ({len(pretty.split(chr(10))) - 50} more lines){NC}"
+                            )
+                        return "\n".join(lines)
+                    except json.JSONDecodeError:
+                        lines.append(
+                            f"    {BOLD}Body ({len(data)} bytes gzip → {len(decompressed)} bytes, text):{NC}"
+                        )
+                        for l in text.split("\n")[:20]:
+                            lines.append(f"      {l[:200]}")
+                        return "\n".join(lines)
+                except UnicodeDecodeError:
+                    lines.append(
+                        f"    {BOLD}Body ({len(data)} bytes gzip → {len(decompressed)} bytes, binary):{NC}"
+                    )
+                    lines.append(f"      {DIM}{self._extract_strings(decompressed)}{NC}")
+                    return "\n".join(lines)
+            except Exception:
+                pass
+
+        # Try protobuf (extract readable strings)
+        if data[:1] in (b"\x08", b"\x0a", b"\x10", b"\x12", b"\x18", b"\x1a", b"\x20", b"\x22"):
+            strings = self._extract_strings(data)
+            if strings:
+                lines.append(f"    {BOLD}Body ({total_len} bytes, protobuf):{NC}")
+                lines.append(f"    {DIM}Extracted strings:{NC}")
+                for s in strings.split(" | ")[:20]:
+                    s = s.strip()
+                    if len(s) > 3:
+                        lines.append(f"      {MAGENTA}{s}{NC}")
+                return "\n".join(lines)
+
+        # Try plain text
+        try:
+            text = data.decode("utf-8")
+            lines.append(f"    {BOLD}Body ({len(data)} bytes, text):{NC}")
+            for l in text.split("\n")[:10]:
+                lines.append(f"      {l[:200]}")
+            return "\n".join(lines)
+        except UnicodeDecodeError:
+            pass
+
+        # PNG
+        if data[:4] == b"\x89PNG":
+            lines.append(f"    {BOLD}Body ({total_len} bytes, PNG image){NC}")
+            return "\n".join(lines)
+
+        # Binary fallback
+        lines.append(f"    {BOLD}Body ({total_len} bytes, binary):{NC}")
+        strings = self._extract_strings(data)
+        if strings:
+            lines.append(f"    {DIM}Extracted strings:{NC}")
+            for s in strings.split(" | ")[:15]:
+                s = s.strip()
+                if len(s) > 3:
+                    lines.append(f"      {MAGENTA}{s}{NC}")
+        else:
+            lines.append(f"      {DIM}(no readable strings){NC}")
+        return "\n".join(lines)
+
+    def _extract_strings(self, data, min_len=4):
+        """Extract printable ASCII strings from binary data."""
+        strings = []
+        current = bytearray()
+        for b in data:
+            if 32 <= b <= 126:
+                current.append(b)
+            else:
+                if len(current) >= min_len:
+                    strings.append(current.decode("ascii"))
+                current = bytearray()
+        if len(current) >= min_len:
+            strings.append(current.decode("ascii"))
+        # Deduplicate while preserving order
+        seen = set()
+        unique = []
+        for s in strings:
+            if s not in seen:
+                seen.add(s)
+                unique.append(s)
+        return " | ".join(unique[:30])
+
+
+def main():
+    if len(sys.argv) > 1:
+        with open(sys.argv[1]) as f:
+            lines = f.readlines()
+    else:
+        lines = sys.stdin.readlines()
+
+    snap = Snapshot()
+    snap.parse(lines)
+    print(snap.render())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/standalone-ls.sh b/scripts/standalone-ls.sh
new file mode 100755
index 0000000..5b62967
--- /dev/null
+++ b/scripts/standalone-ls.sh
@@ -0,0 +1,277 @@
+#!/usr/bin/env bash
+# ╔═══════════════════════════════════════════════════════════════════════════╗
+# ║  Standalone Language Server Launcher                                    ║
+# ║                                                                         ║
+# ║  Launches an isolated LS instance that:                                 ║
+# ║    - Shares OAuth via the main app's extension server                   ║
+# ║    - Has its own HTTPS port, data dir, and cascades                     ║
+# ║    - Optionally routes traffic through our MITM proxy                   ║
+# ║    - Can capture a clean traffic snapshot                               ║
+# ║                                                                         ║
+# ║  Usage:                                                                 ║
+# ║    ./standalone-ls.sh                 # Launch, test, exit              ║
+# ║    ./standalone-ls.sh --fg            # Foreground (stay alive)          ║
+# ║    ./standalone-ls.sh --mitm          # Route through MITM proxy        ║
+# ║    ./standalone-ls.sh --snapshot      # Capture clean traffic dump       ║
+# ║    ./standalone-ls.sh --snapshot --prompt "Say hello"                    ║
+# ╚═══════════════════════════════════════════════════════════════════════════╝
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# ── Defaults ──────────────────────────────────────────────────────────────────
+LS_BIN="/usr/share/antigravity/resources/app/extensions/antigravity/bin/language_server_linux_x64"
+HTTPS_PORT="42200"
+DATA_DIR="/tmp/antigravity-standalone"
+FOREGROUND=false
+USE_MITM=false
+SNAPSHOT=false
+TIMEOUT=15
+PROMPT=""
+MODEL="MODEL_PLACEHOLDER_M3"
+
+# ── Parse args ────────────────────────────────────────────────────────────────
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --port)      HTTPS_PORT="$2"; shift 2 ;;
+        --mitm)      USE_MITM=true; shift ;;
+        --fg)        FOREGROUND=true; shift ;;
+        --timeout)   TIMEOUT="$2"; shift 2 ;;
+        --snapshot)  SNAPSHOT=true; TIMEOUT=30; shift ;;
+        --prompt)    PROMPT="$2"; shift 2 ;;
+        --model)     MODEL="$2"; shift 2 ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS]"
+            echo ""
+            echo "Options:"
+            echo "  --port PORT      HTTPS port for standalone LS (default: 42200)"
+            echo "  --mitm           Route traffic through MITM proxy"
+            echo "  --fg             Run in foreground (stay alive)"
+            echo "  --timeout SECS   Background mode timeout (default: 15)"
+            echo "  --snapshot       Capture clean traffic snapshot"
+            echo "  --prompt TEXT    Prompt to send (snapshot mode)"
+            echo "  --model MODEL    Model alias (default: MODEL_PLACEHOLDER_M3)"
+            exit 0 ;;
+        *) echo "Unknown option: $1"; exit 1 ;;
+    esac
+done
+
+# ── Discover main LS config ──────────────────────────────────────────────────
+MAIN_PID=$(pgrep -f 'language_server_linux_x64' | head -1 || true)
+if [[ -z "$MAIN_PID" ]]; then
+    echo "[-] No main LS process found. Main Antigravity must be running."
+    exit 1
+fi
+
+MAIN_CSRF=$(tr '\0' '\n' < /proc/"$MAIN_PID"/cmdline | grep -A1 'csrf_token' | tail -1)
+EXT_PORT=$(tr '\0' '\n' < /proc/"$MAIN_PID"/cmdline | grep -A1 'extension_server_port' | tail -1)
+
+echo "[*] Main LS PID: $MAIN_PID"
+echo "[*] CSRF: $MAIN_CSRF"
+echo "[*] Extension server: $EXT_PORT"
+
+# ── Build protobuf metadata for stdin ─────────────────────────────────────────
+TS=$(date +%s)
+METADATA=$(python3 -c "
+import sys
+def v(n):
+    r = bytearray()
+    while n > 0x7f:
+        r.append((n & 0x7f) | 0x80)
+        n >>= 7
+    r.append(n & 0x7f)
+    return bytes(r)
+def s(f, val):
+    t = v((f << 3) | 2)
+    d = val.encode()
+    return t + v(len(d)) + d
+buf = bytearray()
+buf += s(1, 'standalone-api-key-$TS')
+buf += s(3, 'antigravity')
+buf += s(4, '1.15.8')
+buf += s(5, '1.16.39')
+buf += s(6, 'en_US')
+buf += s(10, 'standalone-session-$TS')
+buf += s(11, 'antigravity')
+sys.stdout.buffer.write(bytes(buf))
+" | base64)
+
+# ── Setup data directory ──────────────────────────────────────────────────────
+mkdir -p "$DATA_DIR/.gemini"
+
+# ── MITM environment ─────────────────────────────────────────────────────────
+MITM_ENV=()
+if $USE_MITM; then
+    REAL_HOME="${SUDO_USER:+$(getent passwd "$SUDO_USER" | cut -d: -f6)}"
+    REAL_HOME="${REAL_HOME:-$HOME}"
+    MITM_PORT_FILE="${REAL_HOME}/.config/antigravity-proxy/mitm-port"
+    CA_PATH="${REAL_HOME}/.config/antigravity-proxy/mitm-ca.pem"
+
+    if [[ -f "$MITM_PORT_FILE" ]]; then
+        MITM_PORT=$(cat "$MITM_PORT_FILE")
+    else
+        MITM_PORT="8742"
+    fi
+
+    if [[ ! -f "$CA_PATH" ]]; then
+        echo "[-] MITM CA cert not found at $CA_PATH"
+        echo "    Start the proxy first to generate it."
+        exit 1
+    fi
+
+    COMBINED_CA="/tmp/antigravity-mitm-combined-ca.pem"
+    SYS_CA=""
+    for candidate in /etc/ssl/certs/ca-certificates.crt /etc/pki/tls/certs/ca-bundle.crt /etc/ssl/cert.pem; do
+        if [[ -f "$candidate" ]]; then SYS_CA="$candidate"; break; fi
+    done
+    if [[ -n "$SYS_CA" ]]; then
+        cat "$SYS_CA" "$CA_PATH" > "$COMBINED_CA"
+    else
+        echo "[-] No system CA bundle found"
+        exit 1
+    fi
+
+    MITM_ENV=(
+        "HTTPS_PROXY=http://127.0.0.1:${MITM_PORT}"
+        "SSL_CERT_FILE=${COMBINED_CA}"
+        "GRPC_DEFAULT_SSL_ROOTS_FILE_PATH=${COMBINED_CA}"
+    )
+    echo "[*] MITM: enabled (port $MITM_PORT)"
+else
+    echo "[*] MITM: disabled (use --mitm to enable)"
+fi
+
+# ── LS args ───────────────────────────────────────────────────────────────────
+LS_ARGS=(
+    -enable_lsp
+    -extension_server_port "$EXT_PORT"
+    -csrf_token "$MAIN_CSRF"
+    -server_port "$HTTPS_PORT"
+    -workspace_id "standalone_$TS"
+    -cloud_code_endpoint "https://daily-cloudcode-pa.googleapis.com"
+    -app_data_dir "antigravity-standalone"
+    -gemini_dir "$DATA_DIR/.gemini"
+)
+
+# ── Extra env for snapshot mode ───────────────────────────────────────────────
+EXTRA_ENV=()
+if $SNAPSHOT; then
+    EXTRA_ENV=("GODEBUG=http2debug=2")
+    echo "[*] Snapshot: enabled (HTTP/2 debug tracing)"
+fi
+
+# ── Banner ────────────────────────────────────────────────────────────────────
+echo ""
+echo "========================================="
+echo "  Standalone LS"
+echo "  Port:    $HTTPS_PORT (HTTPS)"
+echo "  Data:    $DATA_DIR"
+echo "  Mode:    $($FOREGROUND && echo "foreground" || echo "background ($TIMEOUT s)")"
+echo "========================================="
+echo ""
+
+# ── Foreground mode ───────────────────────────────────────────────────────────
+if $FOREGROUND; then
+    echo "$METADATA" | base64 -d | \
+        env "${MITM_ENV[@]+"${MITM_ENV[@]}"}" \
+            "${EXTRA_ENV[@]+"${EXTRA_ENV[@]}"}" \
+            ANTIGRAVITY_EDITOR_APP_ROOT="/usr/share/antigravity/resources/app" \
+        exec "$LS_BIN" "${LS_ARGS[@]}"
+    exit 0
+fi
+
+# ── Background mode ──────────────────────────────────────────────────────────
+LOG="/tmp/standalone-ls.log"
+rm -f "$LOG"
+
+echo "$METADATA" | base64 -d | \
+    env "${MITM_ENV[@]+"${MITM_ENV[@]}"}" \
+        "${EXTRA_ENV[@]+"${EXTRA_ENV[@]}"}" \
+        ANTIGRAVITY_EDITOR_APP_ROOT="/usr/share/antigravity/resources/app" \
+    timeout "$TIMEOUT" "$LS_BIN" "${LS_ARGS[@]}" \
+    > "$LOG" 2>&1 &
+
+LS_PID=$!
+echo "[*] PID: $LS_PID"
+
+# Wait for init
+for i in $(seq 1 5); do
+    sleep 1
+    if ! kill -0 "$LS_PID" 2>/dev/null; then
+        echo "[-] LS died after ${i}s"
+        echo "=== LOGS ==="
+        cat "$LOG"
+        exit 1
+    fi
+done
+echo "[+] LS alive and initialized"
+
+# ── Snapshot mode: send a prompt and capture traffic ──────────────────────────
+if $SNAPSHOT; then
+    if [[ -z "$PROMPT" ]]; then
+        PROMPT="Say exactly: Hello standalone world"
+    fi
+
+    echo ""
+    echo "[*] Sending cascade: \"$PROMPT\""
+    CASCADE_ID=$(curl -sk --max-time 10 \
+        "https://127.0.0.1:${HTTPS_PORT}/exa.language_server_pb.LanguageServerService/StartCascade" \
+        -H "Content-Type: application/json" \
+        -H "x-codeium-csrf-token: $MAIN_CSRF" \
+        -H "Origin: vscode-file://vscode-app" \
+        -d "{
+            \"prompt\": \"$PROMPT\",
+            \"modelOrAlias\": {\"model\": \"$MODEL\"},
+            \"workspaceRootPaths\": [\"$DATA_DIR\"]
+        }" 2>/dev/null | python3 -c "import json,sys; print(json.load(sys.stdin).get('cascadeId',''))" 2>/dev/null || true)
+
+    echo "[*] Cascade: $CASCADE_ID"
+    echo "[*] Waiting 15s for upstream API calls..."
+    sleep 15
+
+    # Kill LS to flush logs
+    kill "$LS_PID" 2>/dev/null
+    wait "$LS_PID" 2>/dev/null || true
+
+    # Parse and display
+    echo ""
+    python3 "$SCRIPT_DIR/parse-snapshot.py" "$LOG"
+
+    # Also save raw log
+    SNAPSHOT_FILE="/tmp/standalone-snapshot-$(date +%Y%m%d-%H%M%S).log"
+    cp "$LOG" "$SNAPSHOT_FILE"
+    echo ""
+    echo "[*] Raw log saved to: $SNAPSHOT_FILE"
+    exit 0
+fi
+
+# ── Normal mode: test and report ──────────────────────────────────────────────
+echo ""
+echo "=== GetUserStatus ==="
+curl -sk "https://127.0.0.1:${HTTPS_PORT}/exa.language_server_pb.LanguageServerService/GetUserStatus" \
+  -H "Content-Type: application/json" \
+  -H "x-codeium-csrf-token: $MAIN_CSRF" \
+  -H "Origin: vscode-file://vscode-app" \
+  -d '{}' 2>/dev/null | python3 -c "
+import json, sys
+try:
+    d = json.load(sys.stdin)
+    us = d.get('userStatus', {})
+    ps = us.get('planStatus', {})
+    pi = ps.get('planInfo', {})
+    print(f'Plan: {pi.get(\"planName\",\"?\")}, Prompt: {ps.get(\"availablePromptCredits\",\"?\")}, Flow: {ps.get(\"availableFlowCredits\",\"?\")}')
+    ut = us.get('userTier', {})
+    print(f'Tier: {ut.get(\"name\",\"?\")}')
+    models = us.get('cascadeModelConfigData', {}).get('clientModelConfigs', [])
+    print(f'Models: {len(models)}')
+    for m in models[:5]:
+        qi = m.get('quotaInfo', {})
+        print(f'  - {m.get(\"label\")}: remaining={qi.get(\"remainingFraction\",\"?\")}')
+except Exception as e:
+    print(f'Error: {e}')
+" 2>/dev/null
+
+echo ""
+kill "$LS_PID" 2>/dev/null || true
+wait "$LS_PID" 2>/dev/null || true
+echo "[*] Done"
diff --git a/src/api/completions.rs b/src/api/completions.rs
new file mode 100644
index 0000000..9ce14e9
--- /dev/null
+++ b/src/api/completions.rs
@@ -0,0 +1,343 @@
+//! OpenAI Chat Completions API (/v1/chat/completions) handler.
+
+use axum::{
+    extract::State,
+    http::StatusCode,
+    response::{sse::Event, IntoResponse, Json, Sse},
+};
+use rand::Rng;
+use std::sync::Arc;
+use tracing::{debug, info, warn};
+
+use super::models::{lookup_model, DEFAULT_MODEL, MODELS};
+use super::polling::{extract_response_text, is_response_done, poll_for_response};
+use super::types::*;
+use super::util::{err_response, now_unix};
+use super::AppState;
+
+// ─── Input extraction ────────────────────────────────────────────────────────
+
+/// Extract user text from Chat Completions messages array.
+fn extract_chat_input(messages: &[CompletionMessage]) -> String {
+    let mut system_parts = Vec::new();
+    let mut user_parts = Vec::new();
+
+    for msg in messages {
+        let text = match &msg.content {
+            serde_json::Value::String(s) => s.clone(),
+            serde_json::Value::Array(arr) => arr
+                .iter()
+                .filter_map(|item| item["text"].as_str())
+                .collect::<Vec<_>>()
+                .join("\n"),
+            _ => continue,
+        };
+        match msg.role.as_str() {
+            "system" | "developer" => system_parts.push(text),
+            "user" => user_parts.push(text),
+            _ => {}
+        }
+    }
+
+    let mut result = String::new();
+    if !system_parts.is_empty() {
+        result.push_str(&system_parts.join("\n"));
+        result.push_str("\n\n");
+    }
+    // Use the last user message
+    if let Some(last) = user_parts.last() {
+        result.push_str(last);
+    }
+    result.trim().to_string()
+}
+
+// ─── Handler ─────────────────────────────────────────────────────────────────
+
+/// POST /v1/chat/completions — OpenAI Chat Completions API compatibility shim.
+/// Accepts standard messages format, reuses the same backend cascade, and
+/// outputs in the Chat Completions streaming/sync format.
+pub(crate) async fn handle_completions(
+    State(state): State<Arc<AppState>>,
+    Json(body): Json<CompletionRequest>,
+) -> axum::response::Response {
+    let model_name = body.model.as_deref().unwrap_or(DEFAULT_MODEL);
+    info!(
+        "POST /v1/chat/completions model={} stream={}",
+        model_name, body.stream
+    );
+
+    let model = match lookup_model(model_name) {
+        Some(m) => m,
+        None => {
+            let names: Vec<&str> = MODELS.iter().map(|m| m.name).collect();
+            return err_response(
+                StatusCode::BAD_REQUEST,
+                format!("Unknown model: {model_name}. Available: {names:?}"),
+                "invalid_request_error",
+            );
+        }
+    };
+
+    let token = state.backend.oauth_token().await;
+    if token.is_empty() {
+        return err_response(
+            StatusCode::UNAUTHORIZED,
+            "No OAuth token. POST to /v1/token or set ANTIGRAVITY_OAUTH_TOKEN env var.".into(),
+            "authentication_error",
+        );
+    }
+
+    let user_text = extract_chat_input(&body.messages);
+    if user_text.is_empty() {
+        return err_response(
+            StatusCode::BAD_REQUEST,
+            "No user message found".to_string(),
+            "invalid_request_error",
+        );
+    }
+
+    // Fresh cascade per request
+    let cascade_id = match state.backend.create_cascade().await {
+        Ok(cid) => cid,
+        Err(e) => {
+            return err_response(
+                StatusCode::BAD_GATEWAY,
+                format!("StartCascade failed: {e}"),
+                "server_error",
+            );
+        }
+    };
+
+    // Send message
+    match state
+        .backend
+        .send_message(&cascade_id, &user_text, model.model_enum)
+        .await
+    {
+        Ok((200, _)) => {
+            let bg = Arc::clone(&state.backend);
+            let cid = cascade_id.clone();
+            tokio::spawn(async move {
+                let _ = bg.update_annotations(&cid).await;
+            });
+        }
+        Ok((status, _)) => {
+            return err_response(
+                StatusCode::BAD_GATEWAY,
+                format!("Backend returned {status}"),
+                "server_error",
+            );
+        }
+        Err(e) => {
+            return err_response(
+                StatusCode::BAD_GATEWAY,
+                format!("Send failed: {e}"),
+                "server_error",
+            );
+        }
+    }
+
+    let completion_id = format!(
+        "chatcmpl-{}",
+        uuid::Uuid::new_v4().to_string().replace('-', "")
+    );
+
+    if body.stream {
+        chat_completions_stream(
+            state,
+            completion_id,
+            model_name.to_string(),
+            cascade_id,
+            body.timeout,
+        )
+        .await
+    } else {
+        chat_completions_sync(
+            state,
+            completion_id,
+            model_name.to_string(),
+            cascade_id,
+            body.timeout,
+        )
+        .await
+    }
+}
+
+// ─── Streaming ───────────────────────────────────────────────────────────────
+
+/// Streaming output in Chat Completions format.
+async fn chat_completions_stream(
+    state: Arc<AppState>,
+    completion_id: String,
+    model_name: String,
+    cascade_id: String,
+    timeout: u64,
+) -> axum::response::Response {
+    let stream = async_stream::stream! {
+        let start = std::time::Instant::now();
+        let mut last_text = String::new();
+
+        // Initial role chunk
+        yield Ok::<_, std::convert::Infallible>(Event::default().data(serde_json::to_string(&serde_json::json!({
+            "id": completion_id,
+            "object": "chat.completion.chunk",
+            "created": now_unix(),
+            "model": model_name,
+            "choices": [{
+                "index": 0,
+                "delta": {"role": "assistant", "content": ""},
+                "finish_reason": serde_json::Value::Null,
+            }],
+        })).unwrap_or_default()));
+
+        while start.elapsed().as_secs() < timeout {
+            if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await {
+                if status == 200 {
+                    if let Some(steps) = data["steps"].as_array() {
+                        let text = extract_response_text(steps);
+
+                        if !text.is_empty() && text != last_text {
+                            let delta = if text.len() > last_text.len() && text.starts_with(&*last_text) {
+                                &text[last_text.len()..]
+                            } else {
+                                &text
+                            };
+
+                            if !delta.is_empty() {
+                                yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
+                                    "id": completion_id,
+                                    "object": "chat.completion.chunk",
+                                    "created": now_unix(),
+                                    "model": model_name,
+                                    "choices": [{
+                                        "index": 0,
+                                        "delta": {"content": delta},
+                                        "finish_reason": serde_json::Value::Null,
+                                    }],
+                                })).unwrap_or_default()));
+                                last_text = text.to_string();
+                            }
+                        }
+
+                        // Done check: need DONE status AND non-empty text
+                        if is_response_done(steps) && !last_text.is_empty() {
+                            debug!("Completions stream done, text length={}", last_text.len());
+                            yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
+                                "id": completion_id,
+                                "object": "chat.completion.chunk",
+                                "created": now_unix(),
+                                "model": model_name,
+                                "choices": [{
+                                    "index": 0,
+                                    "delta": {},
+                                    "finish_reason": "stop",
+                                }],
+                            })).unwrap_or_default()));
+                            yield Ok(Event::default().data("[DONE]".to_string()));
+                            return;
+                        }
+
+                        // IDLE fallback: check trajectory status periodically
+                        // Only check every 5th step count to reduce backend traffic
+                        let step_count = steps.len();
+                        if step_count > 4 && step_count % 5 == 0 {
+                            if let Ok((ts, td)) = state.backend.get_trajectory(&cascade_id).await {
+                                if ts == 200 {
+                                    let run_status = td["status"].as_str().unwrap_or("");
+                                    if run_status.contains("IDLE") && !last_text.is_empty() {
+                                        debug!("Completions IDLE, text length={}", last_text.len());
+                                        yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
+                                            "id": completion_id,
+                                            "object": "chat.completion.chunk",
+                                            "created": now_unix(),
+                                            "model": model_name,
+                                            "choices": [{
+                                                "index": 0,
+                                                "delta": {},
+                                                "finish_reason": "stop",
+                                            }],
+                                        })).unwrap_or_default()));
+                                        yield Ok(Event::default().data("[DONE]".to_string()));
+                                        return;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            let poll_ms: u64 = rand::thread_rng().gen_range(800..1200);
+            tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
+        }
+
+        // Timeout
+        warn!("Completions stream timeout after {}s", timeout);
+        yield Ok(Event::default().data(serde_json::to_string(&serde_json::json!({
+            "id": completion_id,
+            "object": "chat.completion.chunk",
+            "created": now_unix(),
+            "model": model_name,
+            "choices": [{
+                "index": 0,
+                "delta": {"content": if last_text.is_empty() { "[Timeout waiting for response]" } else { "" }},
+                "finish_reason": "stop",
+            }],
+        })).unwrap_or_default()));
+        yield Ok(Event::default().data("[DONE]".to_string()));
+    };
+
+    Sse::new(stream)
+        .keep_alive(
+            axum::response::sse::KeepAlive::new()
+                .interval(std::time::Duration::from_secs(15))
+                .text(""),
+        )
+        .into_response()
+}
+
+// ─── Sync ────────────────────────────────────────────────────────────────────
+
+/// Sync output in Chat Completions format.
+async fn chat_completions_sync(
+    state: Arc<AppState>,
+    completion_id: String,
+    model_name: String,
+    cascade_id: String,
+    timeout: u64,
+) -> axum::response::Response {
+    let result = poll_for_response(&state, &cascade_id, timeout).await;
+
+    // Check MITM store first for real intercepted usage
+    let (prompt_tokens, completion_tokens, cached_tokens) = if let Some(mitm_usage) = state.mitm_store.take_usage(&cascade_id).await {
+        (mitm_usage.input_tokens, mitm_usage.output_tokens, mitm_usage.cache_read_input_tokens)
+    } else if let Some(u) = &result.usage {
+        (u.input_tokens, u.output_tokens, 0)
+    } else {
+        (0, 0, 0)
+    };
+
+    Json(serde_json::json!({
+        "id": completion_id,
+        "object": "chat.completion",
+        "created": now_unix(),
+        "model": model_name,
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": result.text,
+            },
+            "finish_reason": "stop",
+        }],
+        "usage": {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+            "prompt_tokens_details": {
+                "cached_tokens": cached_tokens,
+            },
+        },
+    }))
+    .into_response()
+}
diff --git a/src/api/mod.rs b/src/api/mod.rs
new file mode 100644
index 0000000..21f5a51
--- /dev/null
+++ b/src/api/mod.rs
@@ -0,0 +1,176 @@
+//! Axum API server — OpenAI-compatible Responses + Chat Completions endpoints.
+
+mod completions;
+mod models;
+mod polling;
+mod responses;
+mod types;
+mod util;
+
+use crate::constants::safe_truncate;
+use crate::session::SessionManager;
+use axum::{
+    extract::{DefaultBodyLimit, State},
+    http::StatusCode,
+    response::{IntoResponse, Json},
+    routing::{delete, get, post},
+    Router,
+};
+use std::sync::Arc;
+use tower_http::cors::CorsLayer;
+use tracing::warn;
+
+use self::models::MODELS;
+use self::types::TokenRequest;
+
+// ─── Shared state ────────────────────────────────────────────────────────────
+
+pub struct AppState {
+    pub backend: Arc<crate::backend::Backend>,
+    pub sessions: SessionManager,
+    pub mitm_store: crate::mitm::store::MitmStore,
+    pub quota_store: crate::quota::QuotaStore,
+}
+
+// ─── Router ──────────────────────────────────────────────────────────────────
+
+pub fn router(state: Arc<AppState>) -> Router {
+    Router::new()
+        .route("/v1/responses", post(responses::handle_responses))
+        .route(
+            "/v1/chat/completions",
+            post(completions::handle_completions),
+        )
+        .route("/v1/models", get(handle_models))
+        .route("/v1/sessions", get(handle_list_sessions))
+        .route("/v1/sessions/{id}", delete(handle_delete_session))
+        .route("/v1/token", post(handle_set_token))
+        .route("/v1/usage", get(handle_usage))
+        .route("/v1/quota", get(handle_quota))
+        .route("/health", get(handle_health))
+        .route("/", get(handle_root))
+        .layer(CorsLayer::permissive())
+        .layer(DefaultBodyLimit::max(1_048_576)) // 1 MB
+        .with_state(state)
+}
+
+// ─── Simple handlers ─────────────────────────────────────────────────────────
+
+async fn handle_root() -> Json<serde_json::Value> {
+    Json(serde_json::json!({
+        "service": "antigravity-openai-proxy",
+        "version": "3.2.0",
+        "runtime": "rust",
+        "endpoints": [
+            "/v1/chat/completions",
+            "/v1/responses",
+            "/v1/models",
+            "/v1/sessions",
+            "/v1/token",
+            "/v1/usage",
+            "/v1/quota",
+            "/health",
+        ],
+    }))
+}
+
+async fn handle_health() -> Json<serde_json::Value> {
+    Json(serde_json::json!({"status": "ok"}))
+}
+
+async fn handle_models() -> Json<serde_json::Value> {
+    let models: Vec<serde_json::Value> = MODELS
+        .iter()
+        .map(|m| {
+            serde_json::json!({
+                "id": m.name,
+                "object": "model",
+                "created": 1700000000u64,
+                "owned_by": "antigravity",
+                "permission": [],
+                "root": m.name,
+                "parent": null,
+                "meta": {
+                    "label": m.label,
+                    "enum_value": m.model_enum,
+                },
+            })
+        })
+        .collect();
+    Json(serde_json::json!({"object": "list", "data": models}))
+}
+
+async fn handle_list_sessions(
+    State(state): State<Arc<AppState>>,
+) -> Json<serde_json::Value> {
+    let sessions = state.sessions.list_sessions().await;
+    Json(serde_json::json!({"sessions": sessions}))
+}
+
+async fn handle_delete_session(
+    State(state): State<Arc<AppState>>,
+    axum::extract::Path(id): axum::extract::Path<String>,
+) -> impl IntoResponse {
+    if state.sessions.delete_session(&id).await {
+        (
+            StatusCode::OK,
+            Json(serde_json::json!({"status": "deleted", "session_id": id})),
+        )
+    } else {
+        (
+            StatusCode::NOT_FOUND,
+            Json(serde_json::json!({"error": format!("Session not found: {id}")})),
+        )
+    }
+}
+
+async fn handle_set_token(
+    State(state): State<Arc<AppState>>,
+    Json(body): Json<TokenRequest>,
+) -> impl IntoResponse {
+    if !body.token.starts_with("ya29.") {
+        return (
+            StatusCode::BAD_REQUEST,
+            Json(serde_json::json!({"error": "Invalid token. Must start with ya29."})),
+        );
+    }
+    state.backend.set_oauth_token(body.token.clone()).await;
+
+    // Also persist to file
+    let token_path = crate::constants::token_file_path();
+    if let Err(e) = tokio::fs::write(&token_path, &body.token).await {
+        warn!("Failed to write token file: {e}");
+    }
+
+    let preview = safe_truncate(&body.token, 20);
+    (
+        StatusCode::OK,
+        Json(serde_json::json!({"status": "ok", "token_prefix": preview})),
+    )
+}
+
+async fn handle_usage(
+    State(state): State<Arc<AppState>>,
+) -> Json<serde_json::Value> {
+    let stats = state.mitm_store.stats().await;
+    Json(serde_json::json!({
+        "mitm": {
+            "total_requests": stats.total_requests,
+            "total_input_tokens": stats.total_input_tokens,
+            "total_output_tokens": stats.total_output_tokens,
+            "total_cache_read_tokens": stats.total_cache_read_tokens,
+            "total_cache_creation_tokens": stats.total_cache_creation_tokens,
+            "total_thinking_output_tokens": stats.total_thinking_output_tokens,
+            "total_response_output_tokens": stats.total_response_output_tokens,
+            "total_tokens": stats.total_input_tokens + stats.total_output_tokens,
+            "per_model": stats.per_model,
+        }
+    }))
+}
+
+async fn handle_quota(
+    State(state): State<Arc<AppState>>,
+) -> Json<serde_json::Value> {
+    let snap = state.quota_store.snapshot().await;
+    Json(serde_json::to_value(snap).unwrap_or_default())
+}
diff --git a/src/api/models.rs b/src/api/models.rs
new file mode 100644
index 0000000..4e16fc9
--- /dev/null
+++ b/src/api/models.rs
@@ -0,0 +1,49 @@
+//! Model definitions and lookup.
+
+/// Model definition: friendly name → (antigravity_id, protobuf_enum, label).
+pub(crate) struct ModelDef {
+    pub name: &'static str,
+    #[allow(dead_code)]
+    pub ag_id: &'static str,
+    pub model_enum: u32,
+    pub label: &'static str,
+}
+
+pub(crate) const MODELS: &[ModelDef] = &[
+    ModelDef {
+        name: "opus-4.6",
+        ag_id: "MODEL_PLACEHOLDER_M26",
+        model_enum: 1026,
+        label: "Claude Opus 4.6 (Thinking)",
+    },
+    ModelDef {
+        name: "opus-4.5",
+        ag_id: "MODEL_PLACEHOLDER_M12",
+        model_enum: 1012,
+        label: "Claude Opus 4.5 (Thinking)",
+    },
+    ModelDef {
+        name: "gemini-3-pro-high",
+        ag_id: "MODEL_PLACEHOLDER_M8",
+        model_enum: 1008,
+        label: "Gemini 3 Pro (High)",
+    },
+    ModelDef {
+        name: "gemini-3-pro",
+        ag_id: "MODEL_PLACEHOLDER_M7",
+        model_enum: 1007,
+        label: "Gemini 3 Pro (Low)",
+    },
+    ModelDef {
+        name: "gemini-3-flash",
+        ag_id: "MODEL_PLACEHOLDER_M18",
+        model_enum: 1018,
+        label: "Gemini 3 Flash",
+    },
+];
+
+pub(crate) const DEFAULT_MODEL: &str = "opus-4.6";
+
+pub(crate) fn lookup_model(name: &str) -> Option<&'static ModelDef> {
+    MODELS.iter().find(|m| m.name == name)
+}
diff --git a/src/api/polling.rs b/src/api/polling.rs
new file mode 100644
index 0000000..6e6c45e
--- /dev/null
+++ b/src/api/polling.rs
@@ -0,0 +1,298 @@
+//! Shared polling engine and step extraction helpers.
+
+use rand::Rng;
+use tracing::{debug, info, warn};
+
+use super::AppState;
+
+/// Real token usage reported by the language server.
+#[derive(Debug, Clone, Default)]
+#[allow(dead_code)]
+pub(crate) struct ModelUsage {
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub api_provider: String,
+    pub model: String,
+}
+
+/// Result of polling — text + optional real usage data + thinking data.
+pub(crate) struct PollResult {
+    pub text: String,
+    pub usage: Option<ModelUsage>,
+    /// Opaque Anthropic thinking verification signature from PLANNER_RESPONSE.
+    /// Required for multi-turn thinking model chaining.
+    pub thinking_signature: Option<String>,
+    /// The model's internal reasoning/thinking content.
+    /// Available for both Opus (Anthropic) and Gemini models.
+    pub thinking: Option<String>,
+    /// Time the model spent thinking, as reported by the LS (e.g. "0.041999832s").
+    pub thinking_duration: Option<String>,
+}
+
+/// Extract the response text from steps — scans in REVERSE to find the latest response.
+pub(crate) fn extract_response_text(steps: &[serde_json::Value]) -> String {
+    for step in steps.iter().rev() {
+        let step_type = step["type"].as_str().unwrap_or("");
+
+        if step_type.contains("PLANNER_RESPONSE") {
+            let resp = &step["plannerResponse"];
+            let text = resp["rawResponse"]
+                .as_str()
+                .or_else(|| resp["response"].as_str())
+                .unwrap_or("");
+            if !text.is_empty() {
+                return text.to_string();
+            }
+        }
+
+        if step_type.contains("AI_RESPONSE") || step_type.contains("MODEL_RESPONSE") {
+            if let Some(text) = step["response"]
+                .as_str()
+                .or_else(|| step["rawResponse"].as_str())
+                .or_else(|| step["text"].as_str())
+            {
+                if !text.is_empty() {
+                    return text.to_string();
+                }
+            }
+        }
+    }
+    String::new()
+}
+
+/// Extract real token usage from the LS's modelUsage field.
+/// The LS reports this in CHECKPOINT steps and sometimes in retryInfos.
+/// Scans in reverse to find the latest usage data.
+pub(crate) fn extract_model_usage(steps: &[serde_json::Value]) -> Option<ModelUsage> {
+    for step in steps.iter().rev() {
+        if let Some(usage) = step.get("metadata").and_then(|m| m.get("modelUsage")) {
+            let input = usage["inputTokens"]
+                .as_str()
+                .and_then(|s| s.parse::<u64>().ok())
+                .or_else(|| usage["inputTokens"].as_u64())
+                .unwrap_or(0);
+            let output = usage["outputTokens"]
+                .as_str()
+                .and_then(|s| s.parse::<u64>().ok())
+                .or_else(|| usage["outputTokens"].as_u64())
+                .unwrap_or(0);
+
+            if input > 0 || output > 0 {
+                return Some(ModelUsage {
+                    input_tokens: input,
+                    output_tokens: output,
+                    api_provider: usage["apiProvider"]
+                        .as_str()
+                        .unwrap_or("")
+                        .to_string(),
+                    model: usage["model"]
+                        .as_str()
+                        .unwrap_or("")
+                        .to_string(),
+                });
+            }
+        }
+    }
+    None
+}
+
+/// Extract the thinking signature from PLANNER_RESPONSE steps.
+/// This is an opaque Base64 blob used by Anthropic for extended thinking
+/// verification. Needed to chain multi-turn conversations with thinking models.
+pub(crate) fn extract_thinking_signature(steps: &[serde_json::Value]) -> Option<String> {
+    for step in steps.iter().rev() {
+        let step_type = step["type"].as_str().unwrap_or("");
+        if step_type.contains("PLANNER_RESPONSE") {
+            if let Some(sig) = step["plannerResponse"]["thinkingSignature"].as_str() {
+                if !sig.is_empty() {
+                    return Some(sig.to_string());
+                }
+            }
+        }
+    }
+    None
+}
+
+/// Extract the model's thinking/reasoning content from PLANNER_RESPONSE steps.
+/// This is the internal monologue produced during extended thinking.
+/// Available for ALL models (Opus, Gemini Flash, Gemini Pro).
+pub(crate) fn extract_thinking_content(steps: &[serde_json::Value]) -> Option<String> {
+    for step in steps.iter().rev() {
+        let step_type = step["type"].as_str().unwrap_or("");
+        if step_type.contains("PLANNER_RESPONSE") {
+            if let Some(thinking) = step["plannerResponse"]["thinking"].as_str() {
+                if !thinking.is_empty() {
+                    return Some(thinking.to_string());
+                }
+            }
+        }
+    }
+    None
+}
+
+/// Extract thinking duration from PLANNER_RESPONSE steps.
+/// Returns the raw duration string as reported by the LS (e.g. "0.041999832s").
+pub(crate) fn extract_thinking_duration(steps: &[serde_json::Value]) -> Option<String> {
+    for step in steps.iter().rev() {
+        let step_type = step["type"].as_str().unwrap_or("");
+        if step_type.contains("PLANNER_RESPONSE") {
+            if let Some(dur) = step["plannerResponse"]["thinkingDuration"].as_str() {
+                if !dur.is_empty() {
+                    return Some(dur.to_string());
+                }
+            }
+        }
+    }
+    None
+}
+
+/// Check if the cascade has truly finished — the last PLANNER_RESPONSE must be DONE
+/// AND the very last step must be a terminal type (CHECKPOINT or PLANNER_RESPONSE with DONE).
+/// This prevents false positives during agentic tool-call loops where intermediate
+/// PLANNER_RESPONSE steps show DONE but the cascade keeps going.
+pub(crate) fn is_response_done(steps: &[serde_json::Value]) -> bool {
+    if steps.is_empty() {
+        return false;
+    }
+
+    let last = &steps[steps.len() - 1];
+    let last_type = last["type"].as_str().unwrap_or("");
+    let last_status = last["status"].as_str().unwrap_or("");
+
+    // CHECKPOINT at the end = cascade is definitely done
+    if last_type.contains("CHECKPOINT") {
+        return true;
+    }
+
+    // Last step is a PLANNER_RESPONSE with DONE = final answer (no more tool calls coming)
+    if (last_type.contains("PLANNER_RESPONSE")
+        || last_type.contains("AI_RESPONSE")
+        || last_type.contains("MODEL_RESPONSE"))
+        && last_status.contains("DONE")
+    {
+        return true;
+    }
+
+    false
+}
+
+/// Poll the backend until we get a response or timeout.
+pub(crate) async fn poll_for_response(
+    state: &AppState,
+    cascade_id: &str,
+    timeout: u64,
+) -> PollResult {
+    let start = std::time::Instant::now();
+    let short_id = &cascade_id[..8.min(cascade_id.len())];
+    info!("Polling for response on cascade {short_id} (timeout={timeout}s)");
+
+    let mut last_step_count: usize = 0;
+
+    while start.elapsed().as_secs() < timeout {
+        if let Ok((status, data)) = state.backend.get_steps(cascade_id).await {
+            if status == 200 {
+                if let Some(steps) = data["steps"].as_array() {
+                    let step_count = steps.len();
+
+                    // Only log when step count changes (denoised)
+                    if step_count != last_step_count {
+                        // Compact type summary: count unique types
+                        let mut type_counts: std::collections::BTreeMap<&str, usize> =
+                            std::collections::BTreeMap::new();
+                        for s in steps.iter() {
+                            let t = s["type"]
+                                .as_str()
+                                .unwrap_or("?")
+                                .strip_prefix("CORTEX_STEP_TYPE_")
+                                .unwrap_or("?");
+                            *type_counts.entry(t).or_insert(0) += 1;
+                        }
+                        let summary: Vec<String> = type_counts
+                            .iter()
+                            .map(|(t, c)| {
+                                if *c > 1 {
+                                    format!("{t}×{c}")
+                                } else {
+                                    t.to_string()
+                                }
+                            })
+                            .collect();
+                        debug!(
+                            "Poll {short_id}: {step_count} steps [{}]",
+                            summary.join(", ")
+                        );
+                        last_step_count = step_count;
+                    }
+
+                    // Check if the cascade is truly done
+                    if is_response_done(steps) {
+                        let text = extract_response_text(steps);
+                        if !text.is_empty() {
+                            let usage = extract_model_usage(steps);
+                            let thinking_signature = extract_thinking_signature(steps);
+                            let thinking = extract_thinking_content(steps);
+                            let thinking_duration = extract_thinking_duration(steps);
+                            let elapsed = start.elapsed().as_secs_f32();
+                            if let Some(ref u) = usage {
+                                info!(
+                                    "Response done ({short_id}), {:.1}s, {} chars, tokens: {}in/{}out ({}){}{}",
+                                    elapsed, text.len(), u.input_tokens, u.output_tokens, u.model,
+                                    if thinking.is_some() { format!(", thinking: {} chars", thinking.as_ref().unwrap().len()) } else { String::new() },
+                                    if thinking_signature.is_some() { ", has sig" } else { "" }
+                                );
+                            } else {
+                                info!(
+                                    "Response done ({short_id}), {:.1}s, {} chars (no usage){}{}",
+                                    elapsed, text.len(),
+                                    if thinking.is_some() { format!(", thinking: {} chars", thinking.as_ref().unwrap().len()) } else { String::new() },
+                                    if thinking_signature.is_some() { ", has sig" } else { "" }
+                                );
+                            }
+                            return PollResult { text, usage, thinking_signature, thinking, thinking_duration };
+                        }
+                    }
+
+                    // Fallback: check trajectory IDLE status (catches edge cases)
+                    // Only check every 5th poll to reduce network calls
+                    if step_count > 4 && step_count % 5 == 0 {
+                        if let Ok((ts, td)) = state.backend.get_trajectory(cascade_id).await
+                        {
+                            if ts == 200 {
+                                let run_status =
+                                    td["status"].as_str().unwrap_or("");
+                                if run_status.contains("IDLE") {
+                                    let text = extract_response_text(steps);
+                                    if !text.is_empty() {
+                                        let usage = extract_model_usage(steps);
+                                        let thinking_signature = extract_thinking_signature(steps);
+                                        let thinking = extract_thinking_content(steps);
+                                        let thinking_duration = extract_thinking_duration(steps);
+                                        let elapsed = start.elapsed().as_secs_f32();
+                                        info!(
+                                            "Trajectory IDLE ({short_id}), {:.1}s, {} chars",
+                                            elapsed,
+                                            text.len()
+                                        );
+                                        return PollResult { text, usage, thinking_signature, thinking, thinking_duration };
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        let poll_ms: u64 = rand::thread_rng().gen_range(1000..1800);
+        tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
+    }
+
+    warn!("Timeout after {timeout}s on cascade {short_id}");
+    PollResult {
+        text: "[Timeout waiting for AI response]".to_string(),
+        usage: None,
+        thinking_signature: None,
+        thinking: None,
+        thinking_duration: None,
+    }
+}
diff --git a/src/api/responses.rs b/src/api/responses.rs
new file mode 100644
index 0000000..5da0421
--- /dev/null
+++ b/src/api/responses.rs
@@ -0,0 +1,686 @@
+//! OpenAI Responses API (/v1/responses) handler.
+//!
+//! Strictly adheres to the official OpenAI Responses API protocol:
+//! https://platform.openai.com/docs/api-reference/responses
+
+use axum::{
+    extract::State,
+    http::StatusCode,
+    response::{sse::Event, IntoResponse, Json, Sse},
+};
+use rand::Rng;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::sync::Arc;
+use tracing::{debug, info};
+
+use super::models::{lookup_model, DEFAULT_MODEL, MODELS};
+use super::polling::{extract_response_text, is_response_done, poll_for_response, extract_model_usage, extract_thinking_signature, extract_thinking_content, extract_thinking_duration};
+use super::types::*;
+use super::util::{err_response, now_unix, responses_sse_event};
+use super::AppState;
+
+// ─── Input extraction ────────────────────────────────────────────────────────
+
+/// Extract user text from Responses API `input` field.
+fn extract_responses_input(input: &serde_json::Value, instructions: Option<&str>) -> String {
+    let user_text = match input {
+        serde_json::Value::String(s) => s.clone(),
+        serde_json::Value::Array(items) => {
+            items
+                .iter()
+                .rev()
+                .find(|item| item["role"].as_str() == Some("user"))
+                .and_then(|item| match &item["content"] {
+                    serde_json::Value::String(s) => Some(s.clone()),
+                    serde_json::Value::Array(parts) => Some(
+                        parts
+                            .iter()
+                            .filter(|p| {
+                                let t = p["type"].as_str().unwrap_or("");
+                                t == "input_text" || t == "text"
+                            })
+                            .filter_map(|p| p["text"].as_str())
+                            .collect::<Vec<_>>()
+                            .join(" "),
+                    ),
+                    _ => None,
+                })
+                .unwrap_or_default()
+        }
+        _ => String::new(),
+    };
+
+    match instructions {
+        Some(inst) if !inst.is_empty() => format!("{inst}\n\n{user_text}"),
+        _ => user_text,
+    }
+}
+
+/// Extract conversation/session ID from Responses API `conversation` field.
+fn extract_conversation_id(conv: &Option<serde_json::Value>) -> Option<String> {
+    match conv {
+        Some(serde_json::Value::String(s)) => Some(s.clone()),
+        Some(obj) => obj["id"].as_str().map(|s| s.to_string()),
+        None => None,
+    }
+}
+
+/// Build a full Response object matching the official OpenAI schema.
+fn build_response_object(
+    id: &str,
+    model: &str,
+    status: &'static str,
+    created_at: u64,
+    completed_at: Option<u64>,
+    output: Vec<ResponseOutput>,
+    usage: Option<Usage>,
+    instructions: Option<&str>,
+    store: bool,
+    temperature: f64,
+    top_p: f64,
+    max_output_tokens: Option<u64>,
+    previous_response_id: Option<&str>,
+    user: Option<&str>,
+    metadata: &serde_json::Value,
+    thinking_signature: Option<String>,
+    thinking: Option<String>,
+    thinking_duration: Option<String>,
+) -> ResponsesResponse {
+    ResponsesResponse {
+        id: id.to_string(),
+        object: "response",
+        created_at,
+        status,
+        completed_at,
+        error: None,
+        incomplete_details: None,
+        instructions: instructions.map(|s| s.to_string()),
+        max_output_tokens,
+        model: model.to_string(),
+        output,
+        parallel_tool_calls: true,
+        previous_response_id: previous_response_id.map(|s| s.to_string()),
+        reasoning: Reasoning::default(),
+        store,
+        temperature,
+        text: TextFormat::default(),
+        tool_choice: "auto",
+        tools: vec![],
+        top_p,
+        truncation: "disabled",
+        usage,
+        user: user.map(|s| s.to_string()),
+        metadata: metadata.clone(),
+        thinking_signature,
+        thinking,
+        thinking_duration,
+    }
+}
+
+/// Serialize a ResponsesResponse to serde_json::Value for SSE embedding.
+fn response_to_json(resp: &ResponsesResponse) -> serde_json::Value {
+    serde_json::to_value(resp).unwrap_or(serde_json::json!({}))
+}
+
+// ─── Handler ─────────────────────────────────────────────────────────────────
+
+pub(crate) async fn handle_responses(
+    State(state): State<Arc<AppState>>,
+    Json(body): Json<ResponsesRequest>,
+) -> axum::response::Response {
+    info!(
+        "POST /v1/responses model={} stream={}",
+        body.model.as_deref().unwrap_or(DEFAULT_MODEL),
+        body.stream
+    );
+
+    let model_name = body.model.as_deref().unwrap_or(DEFAULT_MODEL);
+    let model = match lookup_model(model_name) {
+        Some(m) => m,
+        None => {
+            let names: Vec<&str> = MODELS.iter().map(|m| m.name).collect();
+            return err_response(
+                StatusCode::BAD_REQUEST,
+                format!("Unknown model: {model_name}. Available: {names:?}"),
+                "invalid_request_error",
+            );
+        }
+    };
+
+    let token = state.backend.oauth_token().await;
+    if token.is_empty() {
+        return err_response(
+            StatusCode::UNAUTHORIZED,
+            "No OAuth token. POST to /v1/token or set ANTIGRAVITY_OAUTH_TOKEN env var.".into(),
+            "authentication_error",
+        );
+    }
+
+    let user_text = extract_responses_input(&body.input, body.instructions.as_deref());
+    if user_text.is_empty() {
+        return err_response(
+            StatusCode::BAD_REQUEST,
+            "No user input found".to_string(),
+            "invalid_request_error",
+        );
+    }
+
+    let response_id = format!(
+        "resp_{}",
+        uuid::Uuid::new_v4().to_string().replace('-', "")
+    );
+
+    // Session/conversation management
+    let session_id_str = extract_conversation_id(&body.conversation);
+    let cascade_id = if let Some(ref sid) = session_id_str {
+        match state
+            .sessions
+            .get_or_create(Some(sid), || state.backend.create_cascade())
+            .await
+        {
+            Ok(sr) => sr.cascade_id,
+            Err(e) => {
+                return err_response(
+                    StatusCode::BAD_GATEWAY,
+                    format!("StartCascade failed: {e}"),
+                    "server_error",
+                );
+            }
+        }
+    } else {
+        match state.backend.create_cascade().await {
+            Ok(cid) => cid,
+            Err(e) => {
+                return err_response(
+                    StatusCode::BAD_GATEWAY,
+                    format!("StartCascade failed: {e}"),
+                    "server_error",
+                );
+            }
+        }
+    };
+
+    // Send message
+    match state
+        .backend
+        .send_message(&cascade_id, &user_text, model.model_enum)
+        .await
+    {
+        Ok((status, _)) if status == 200 => {
+            let bg = Arc::clone(&state.backend);
+            let cid = cascade_id.clone();
+            tokio::spawn(async move {
+                let _ = bg.update_annotations(&cid).await;
+            });
+        }
+        Ok((status, _)) => {
+            return err_response(
+                StatusCode::BAD_GATEWAY,
+                format!("Antigravity returned {status}"),
+                "server_error",
+            );
+        }
+        Err(e) => {
+            return err_response(
+                StatusCode::BAD_GATEWAY,
+                format!("Send message failed: {e}"),
+                "server_error",
+            );
+        }
+    }
+
+    // Capture request params for response building
+    let req_params = RequestParams {
+        user_text: user_text.clone(),
+        instructions: body.instructions.clone(),
+        store: body.store,
+        temperature: body.temperature.unwrap_or(1.0),
+        top_p: body.top_p.unwrap_or(1.0),
+        max_output_tokens: body.max_output_tokens,
+        previous_response_id: body.previous_response_id.clone(),
+        user: body.user.clone(),
+        metadata: body.metadata.clone().unwrap_or(serde_json::json!({})),
+    };
+
+    if body.stream {
+        handle_responses_stream(
+            state, response_id, model_name.to_string(), cascade_id,
+            body.timeout, req_params,
+        )
+        .await
+    } else {
+        handle_responses_sync(
+            state, response_id, model_name.to_string(), cascade_id,
+            body.timeout, req_params,
+        )
+        .await
+    }
+}
+
+/// Captured request parameters needed to echo back in the response.
+struct RequestParams {
+    user_text: String,
+    instructions: Option<String>,
+    store: bool,
+    temperature: f64,
+    top_p: f64,
+    max_output_tokens: Option<u64>,
+    previous_response_id: Option<String>,
+    user: Option<String>,
+    metadata: serde_json::Value,
+}
+
+/// Build Usage from the best available source:
+/// 1. MITM intercepted data (real API tokens, including cache stats)
+/// 2. LS trajectory data (real tokens, no cache info)
+/// 3. Estimation from text lengths (fallback)
+async fn usage_from_poll(
+    mitm_store: &crate::mitm::store::MitmStore,
+    cascade_id: &str,
+    model_usage: &Option<super::polling::ModelUsage>,
+    input_text: &str,
+    output_text: &str,
+) -> Usage {
+    // Priority 1: MITM intercepted data (most accurate — includes cache tokens)
+    if let Some(mitm_usage) = mitm_store.take_usage(cascade_id).await {
+        tracing::debug!(
+            input = mitm_usage.input_tokens,
+            output = mitm_usage.output_tokens,
+            cache_read = mitm_usage.cache_read_input_tokens,
+            cache_create = mitm_usage.cache_creation_input_tokens,
+            thinking = mitm_usage.thinking_output_tokens,
+            "Using MITM intercepted usage"
+        );
+        return Usage {
+            input_tokens: mitm_usage.input_tokens,
+            input_tokens_details: InputTokensDetails {
+                cached_tokens: mitm_usage.cache_read_input_tokens,
+            },
+            output_tokens: mitm_usage.output_tokens,
+            output_tokens_details: OutputTokensDetails {
+                reasoning_tokens: mitm_usage.thinking_output_tokens,
+            },
+            total_tokens: mitm_usage.input_tokens + mitm_usage.output_tokens,
+        };
+    }
+
+    // Priority 2: LS trajectory data (from CHECKPOINT/metadata steps)
+    if let Some(u) = model_usage {
+        return Usage {
+            input_tokens: u.input_tokens,
+            input_tokens_details: InputTokensDetails { cached_tokens: 0 },
+            output_tokens: u.output_tokens,
+            output_tokens_details: OutputTokensDetails { reasoning_tokens: 0 },
+            total_tokens: u.input_tokens + u.output_tokens,
+        };
+    }
+
+    // Priority 3: Estimate from text lengths
+    Usage::estimate(input_text, output_text)
+}
+
+// ─── Sync response ───────────────────────────────────────────────────────────
+
+async fn handle_responses_sync(
+    state: Arc<AppState>,
+    response_id: String,
+    model_name: String,
+    cascade_id: String,
+    timeout: u64,
+    params: RequestParams,
+) -> axum::response::Response {
+    let created_at = now_unix();
+    let poll_result = poll_for_response(&state, &cascade_id, timeout).await;
+    let completed_at = now_unix();
+    let msg_id = format!(
+        "msg_{}",
+        uuid::Uuid::new_v4().to_string().replace('-', "")
+    );
+
+    let usage = usage_from_poll(&state.mitm_store, &cascade_id, &poll_result.usage, &params.user_text, &poll_result.text).await;
+
+    let resp = build_response_object(
+        &response_id,
+        &model_name,
+        "completed",
+        created_at,
+        Some(completed_at),
+        vec![ResponseOutput {
+            output_type: "message",
+            id: msg_id,
+            status: "completed",
+            role: "assistant",
+            content: vec![OutputContent {
+                content_type: "output_text",
+                text: poll_result.text,
+                annotations: vec![],
+            }],
+        }],
+        Some(usage),
+        params.instructions.as_deref(),
+        params.store,
+        params.temperature,
+        params.top_p,
+        params.max_output_tokens,
+        params.previous_response_id.as_deref(),
+        params.user.as_deref(),
+        &params.metadata,
+        poll_result.thinking_signature,
+        poll_result.thinking,
+        poll_result.thinking_duration,
+    );
+
+    Json(resp).into_response()
+}
+
+// ─── Streaming response ─────────────────────────────────────────────────────
+
+async fn handle_responses_stream(
+    state: Arc<AppState>,
+    response_id: String,
+    model_name: String,
+    cascade_id: String,
+    timeout: u64,
+    params: RequestParams,
+) -> axum::response::Response {
+    let stream = async_stream::stream! {
+        let msg_id = format!("msg_{}", uuid::Uuid::new_v4().to_string().replace('-', ""));
+        let created_at = now_unix();
+        let seq = AtomicU32::new(0);
+        let next_seq = || seq.fetch_add(1, Ordering::Relaxed);
+        const CONTENT_IDX: u32 = 0;
+        const OUTPUT_IDX: u32 = 0;
+
+        // Build the in-progress response shell (no output yet)
+        let in_progress_resp = build_response_object(
+            &response_id, &model_name, "in_progress", created_at, None,
+            vec![], None,
+            params.instructions.as_deref(), params.store,
+            params.temperature, params.top_p,
+            params.max_output_tokens, params.previous_response_id.as_deref(),
+            params.user.as_deref(), &params.metadata,
+            None, None, None,
+        );
+        let resp_json = response_to_json(&in_progress_resp);
+
+        // 1. response.created
+        yield Ok::<_, std::convert::Infallible>(responses_sse_event(
+            "response.created",
+            serde_json::json!({
+                "type": "response.created",
+                "sequence_number": next_seq(),
+                "response": resp_json,
+            }),
+        ));
+
+        // 2. response.in_progress
+        yield Ok(responses_sse_event(
+            "response.in_progress",
+            serde_json::json!({
+                "type": "response.in_progress",
+                "sequence_number": next_seq(),
+                "response": resp_json,
+            }),
+        ));
+
+        // 3. response.output_item.added
+        yield Ok(responses_sse_event(
+            "response.output_item.added",
+            serde_json::json!({
+                "type": "response.output_item.added",
+                "sequence_number": next_seq(),
+                "output_index": OUTPUT_IDX,
+                "item": {
+                    "type": "message",
+                    "id": &msg_id,
+                    "status": "in_progress",
+                    "role": "assistant",
+                    "content": [],
+                }
+            }),
+        ));
+
+        // 4. response.content_part.added
+        yield Ok(responses_sse_event(
+            "response.content_part.added",
+            serde_json::json!({
+                "type": "response.content_part.added",
+                "sequence_number": next_seq(),
+                "output_index": OUTPUT_IDX,
+                "content_index": CONTENT_IDX,
+                "part": {
+                    "type": "output_text",
+                    "text": "",
+                    "annotations": [],
+                }
+            }),
+        ));
+
+        // 5. Poll and emit text deltas
+        let start = std::time::Instant::now();
+        let mut last_text = String::new();
+
+        while start.elapsed().as_secs() < timeout {
+            if let Ok((status, data)) = state.backend.get_steps(&cascade_id).await {
+                if status == 200 {
+                    if let Some(steps) = data["steps"].as_array() {
+                        let text = extract_response_text(steps);
+
+                        if !text.is_empty() && text != last_text {
+                            let new_content = if text.len() > last_text.len()
+                                && text.starts_with(&*last_text)
+                            {
+                                &text[last_text.len()..]
+                            } else {
+                                &text
+                            };
+
+                            if !new_content.is_empty() {
+                                yield Ok(responses_sse_event(
+                                    "response.output_text.delta",
+                                    serde_json::json!({
+                                        "type": "response.output_text.delta",
+                                        "sequence_number": next_seq(),
+                                        "item_id": &msg_id,
+                                        "output_index": OUTPUT_IDX,
+                                        "content_index": CONTENT_IDX,
+                                        "delta": new_content,
+                                    }),
+                                ));
+                                last_text = text.to_string();
+                            }
+                        }
+
+                        // Check if response is done AND we have text
+                        if is_response_done(steps) && !last_text.is_empty() {
+                            debug!("Response done, text length={}", last_text.len());
+                            let mu = extract_model_usage(steps);
+                            let usage = usage_from_poll(&state.mitm_store, &cascade_id, &mu, &params.user_text, &last_text).await;
+                            let ts = extract_thinking_signature(steps);
+                            let tc = extract_thinking_content(steps);
+                            let td = extract_thinking_duration(steps);
+                            for evt in completion_events(
+                                &response_id, &model_name, &msg_id,
+                                OUTPUT_IDX, CONTENT_IDX, &last_text, usage,
+                                created_at, &seq, &params, ts, tc, td,
+                            ) {
+                                yield Ok(evt);
+                            }
+                            return;
+                        }
+
+                        // IDLE fallback: check trajectory status periodically
+                        let step_count = steps.len();
+                        if step_count > 4 && step_count % 5 == 0 {
+                            if let Ok((ts, td)) = state.backend.get_trajectory(&cascade_id).await {
+                                if ts == 200 {
+                                    let run_status = td["status"].as_str().unwrap_or("");
+                                    if run_status.contains("IDLE") && !last_text.is_empty() {
+                                        debug!("Trajectory IDLE, text length={}", last_text.len());
+                                        let mu = extract_model_usage(steps);
+                                        let usage = usage_from_poll(&state.mitm_store, &cascade_id, &mu, &params.user_text, &last_text).await;
+                                        let ts = extract_thinking_signature(steps);
+                                        let tc = extract_thinking_content(steps);
+                                        let td = extract_thinking_duration(steps);
+                                        for evt in completion_events(
+                                            &response_id, &model_name, &msg_id,
+                                            OUTPUT_IDX, CONTENT_IDX, &last_text, usage,
+                                            created_at, &seq, &params, ts, tc, td,
+                                        ) {
+                                            yield Ok(evt);
+                                        }
+                                        return;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            let poll_ms: u64 = rand::thread_rng().gen_range(800..1200);
+            tokio::time::sleep(tokio::time::Duration::from_millis(poll_ms)).await;
+        }
+
+        // Timeout — emit incomplete response
+        let timeout_resp = build_response_object(
+            &response_id, &model_name, "incomplete", created_at, None,
+            vec![], Some(Usage::estimate(&params.user_text, "")),
+            params.instructions.as_deref(), params.store,
+            params.temperature, params.top_p,
+            params.max_output_tokens, params.previous_response_id.as_deref(),
+            params.user.as_deref(), &params.metadata,
+            None, None, None,
+        );
+        yield Ok(responses_sse_event(
+            "response.completed",
+            serde_json::json!({
+                "type": "response.completed",
+                "sequence_number": next_seq(),
+                "response": response_to_json(&timeout_resp),
+            }),
+        ));
+    };
+
+    Sse::new(stream)
+        .keep_alive(
+            axum::response::sse::KeepAlive::new()
+                .interval(std::time::Duration::from_secs(15))
+                .text(""),
+        )
+        .into_response()
+}
+
+// ─── SSE completion events ───────────────────────────────────────────────────
+
+/// Build the completion SSE events sequence matching the official protocol:
+/// 1. response.output_text.done
+/// 2. response.content_part.done
+/// 3. response.output_item.done
+/// 4. response.completed
+fn completion_events(
+    resp_id: &str,
+    model: &str,
+    msg_id: &str,
+    out_idx: u32,
+    content_idx: u32,
+    text: &str,
+    usage: Usage,
+    created_at: u64,
+    seq: &AtomicU32,
+    params: &RequestParams,
+    thinking_signature: Option<String>,
+    thinking: Option<String>,
+    thinking_duration: Option<String>,
+) -> Vec<Event> {
+    let next_seq = || seq.fetch_add(1, Ordering::Relaxed);
+    let completed_at = now_unix();
+
+    let output_item = serde_json::json!({
+        "type": "message",
+        "id": msg_id,
+        "status": "completed",
+        "role": "assistant",
+        "content": [{
+            "type": "output_text",
+            "text": text,
+            "annotations": [],
+        }],
+    });
+
+    let completed_resp = build_response_object(
+        resp_id, model, "completed", created_at, Some(completed_at),
+        vec![ResponseOutput {
+            output_type: "message",
+            id: msg_id.to_string(),
+            status: "completed",
+            role: "assistant",
+            content: vec![OutputContent {
+                content_type: "output_text",
+                text: text.to_string(),
+                annotations: vec![],
+            }],
+        }],
+        Some(usage),
+        params.instructions.as_deref(),
+        params.store,
+        params.temperature,
+        params.top_p,
+        params.max_output_tokens,
+        params.previous_response_id.as_deref(),
+        params.user.as_deref(),
+        &params.metadata,
+        thinking_signature,
+        thinking,
+        thinking_duration,
+    );
+
+    vec![
+        // 1. response.output_text.done
+        responses_sse_event(
+            "response.output_text.done",
+            serde_json::json!({
+                "type": "response.output_text.done",
+                "sequence_number": next_seq(),
+                "item_id": msg_id,
+                "output_index": out_idx,
+                "content_index": content_idx,
+                "text": text,
+            }),
+        ),
+        // 2. response.content_part.done
+        responses_sse_event(
+            "response.content_part.done",
+            serde_json::json!({
+                "type": "response.content_part.done",
+                "sequence_number": next_seq(),
+                "output_index": out_idx,
+                "content_index": content_idx,
+                "part": {
+                    "type": "output_text",
+                    "text": text,
+                    "annotations": [],
+                },
+            }),
+        ),
+        // 3. response.output_item.done
+        responses_sse_event(
+            "response.output_item.done",
+            serde_json::json!({
+                "type": "response.output_item.done",
+                "sequence_number": next_seq(),
+                "output_index": out_idx,
+                "item": output_item,
+            }),
+        ),
+        // 4. response.completed
+        responses_sse_event(
+            "response.completed",
+            serde_json::json!({
+                "type": "response.completed",
+                "sequence_number": next_seq(),
+                "response": response_to_json(&completed_resp),
+            }),
+        ),
+    ]
+}
diff --git a/src/api/types.rs b/src/api/types.rs
new file mode 100644
index 0000000..b076484
--- /dev/null
+++ b/src/api/types.rs
@@ -0,0 +1,241 @@
+//! Request/response types for the OpenAI-compatible API.
+//!
+//! All response shapes strictly match the official OpenAI Responses API spec:
+//! https://platform.openai.com/docs/api-reference/responses
+
+use serde::{Deserialize, Serialize};
+
+// ─── Request types ───────────────────────────────────────────────────────────
+
+#[derive(Deserialize)]
+pub(crate) struct ResponsesRequest {
+    pub model: Option<String>,
+    pub input: serde_json::Value,
+    #[serde(default)]
+    pub instructions: Option<String>,
+    #[serde(default)]
+    pub stream: bool,
+    #[serde(default = "default_timeout")]
+    pub timeout: u64,
+    pub conversation: Option<serde_json::Value>,
+    #[serde(default = "default_true")]
+    pub store: bool,
+    #[serde(default)]
+    pub temperature: Option<f64>,
+    #[serde(default)]
+    pub top_p: Option<f64>,
+    #[serde(default)]
+    pub max_output_tokens: Option<u64>,
+    #[serde(default)]
+    pub previous_response_id: Option<String>,
+    #[serde(default)]
+    pub metadata: Option<serde_json::Value>,
+    #[serde(default)]
+    pub user: Option<String>,
+}
+
+/// Chat Completions request (OpenAI-compatible).
+#[derive(Deserialize)]
+pub(crate) struct CompletionRequest {
+    pub model: Option<String>,
+    pub messages: Vec<CompletionMessage>,
+    #[serde(default)]
+    pub stream: bool,
+    #[serde(default = "default_timeout")]
+    pub timeout: u64,
+}
+
+#[derive(Deserialize)]
+pub(crate) struct CompletionMessage {
+    pub role: String,
+    pub content: serde_json::Value,
+}
+
+fn default_timeout() -> u64 {
+    120
+}
+
+fn default_true() -> bool {
+    true
+}
+
+// ─── Response types (official OpenAI Responses API shape) ────────────────────
+
+/// Top-level Response object — matches OpenAI exactly.
+#[derive(Serialize, Clone)]
+pub(crate) struct ResponsesResponse {
+    pub id: String,
+    pub object: &'static str,
+    pub created_at: u64,
+    pub status: &'static str,
+    #[serde(serialize_with = "serialize_option_u64")]
+    pub completed_at: Option<u64>,
+    pub error: Option<serde_json::Value>,
+    pub incomplete_details: Option<serde_json::Value>,
+    pub instructions: Option<String>,
+    #[serde(serialize_with = "serialize_option_u64")]
+    pub max_output_tokens: Option<u64>,
+    pub model: String,
+    pub output: Vec<ResponseOutput>,
+    pub parallel_tool_calls: bool,
+    pub previous_response_id: Option<String>,
+    pub reasoning: Reasoning,
+    pub store: bool,
+    pub temperature: f64,
+    pub text: TextFormat,
+    pub tool_choice: &'static str,
+    pub tools: Vec<serde_json::Value>,
+    pub top_p: f64,
+    pub truncation: &'static str,
+    pub usage: Option<Usage>,
+    pub user: Option<String>,
+    pub metadata: serde_json::Value,
+    /// Proxy extension: opaque thinking verification signature.
+    /// Present for all models. Required for multi-turn chaining with thinking models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking_signature: Option<String>,
+    /// Proxy extension: the model's internal reasoning/thinking content.
+    /// Available for all models (Opus, Gemini Flash, Gemini Pro).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking: Option<String>,
+    /// Proxy extension: time spent thinking (e.g. "0.041999832s").
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking_duration: Option<String>,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct ResponseOutput {
+    #[serde(rename = "type")]
+    pub output_type: &'static str,
+    pub id: String,
+    pub status: &'static str,
+    pub role: &'static str,
+    pub content: Vec<OutputContent>,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct OutputContent {
+    #[serde(rename = "type")]
+    pub content_type: &'static str,
+    pub text: String,
+    pub annotations: Vec<serde_json::Value>,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct Usage {
+    pub input_tokens: u64,
+    pub input_tokens_details: InputTokensDetails,
+    pub output_tokens: u64,
+    pub output_tokens_details: OutputTokensDetails,
+    pub total_tokens: u64,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct InputTokensDetails {
+    pub cached_tokens: u64,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct OutputTokensDetails {
+    pub reasoning_tokens: u64,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct Reasoning {
+    pub effort: Option<String>,
+    pub summary: Option<String>,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct TextFormat {
+    pub format: TextFormatInner,
+}
+
+#[derive(Serialize, Clone)]
+pub(crate) struct TextFormatInner {
+    #[serde(rename = "type")]
+    pub format_type: &'static str,
+}
+
+impl Usage {
+    /// Estimate token counts from actual text.
+    /// Uses ~4 chars/token heuristic (standard GPT tokenizer average).
+    pub fn estimate(input_text: &str, output_text: &str) -> Self {
+        let input_tokens = (input_text.len() as u64 + 3) / 4;
+        let output_tokens = (output_text.len() as u64 + 3) / 4;
+        Self {
+            input_tokens,
+            input_tokens_details: InputTokensDetails { cached_tokens: 0 },
+            output_tokens,
+            output_tokens_details: OutputTokensDetails {
+                reasoning_tokens: 0,
+            },
+            total_tokens: input_tokens + output_tokens,
+        }
+    }
+}
+
+impl Default for Usage {
+    fn default() -> Self {
+        Self {
+            input_tokens: 0,
+            input_tokens_details: InputTokensDetails { cached_tokens: 0 },
+            output_tokens: 0,
+            output_tokens_details: OutputTokensDetails {
+                reasoning_tokens: 0,
+            },
+            total_tokens: 0,
+        }
+    }
+}
+
+impl Default for Reasoning {
+    fn default() -> Self {
+        Self {
+            effort: None,
+            summary: None,
+        }
+    }
+}
+
+impl Default for TextFormat {
+    fn default() -> Self {
+        Self {
+            format: TextFormatInner {
+                format_type: "text",
+            },
+        }
+    }
+}
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+/// Serialize Option<u64> as either the number or JSON null (not omitted).
+fn serialize_option_u64<S>(val: &Option<u64>, s: S) -> Result<S::Ok, S::Error>
+where
+    S: serde::Serializer,
+{
+    match val {
+        Some(v) => s.serialize_u64(*v),
+        None => s.serialize_none(),
+    }
+}
+
+// ─── Shared types ────────────────────────────────────────────────────────────
+
+#[derive(Deserialize)]
+pub(crate) struct TokenRequest {
+    pub token: String,
+}
+
+#[derive(Serialize)]
+pub(crate) struct ErrorResponse {
+    pub error: ErrorDetail,
+}
+
+#[derive(Serialize)]
+pub(crate) struct ErrorDetail {
+    pub message: String,
+    #[serde(rename = "type")]
+    pub error_type: String,
+}
diff --git a/src/api/util.rs b/src/api/util.rs
new file mode 100644
index 0000000..393e327
--- /dev/null
+++ b/src/api/util.rs
@@ -0,0 +1,36 @@
+//! Shared utilities for API handlers.
+
+use axum::{
+    http::StatusCode,
+    response::{sse::Event, IntoResponse, Json},
+};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use super::types::{ErrorDetail, ErrorResponse};
+
+pub(crate) fn err_response(
+    status: StatusCode,
+    message: String,
+    error_type: &str,
+) -> axum::response::Response {
+    let body = ErrorResponse {
+        error: ErrorDetail {
+            message,
+            error_type: error_type.to_string(),
+        },
+    };
+    (status, Json(body)).into_response()
+}
+
+pub(crate) fn now_unix() -> u64 {
+    SystemTime::now()
+        .duration_since(UNIX_EPOCH)
+        .unwrap_or_default()
+        .as_secs()
+}
+
+pub(crate) fn responses_sse_event(event_type: &str, data: serde_json::Value) -> Event {
+    Event::default()
+        .event(event_type)
+        .data(serde_json::to_string(&data).unwrap())
+}
diff --git a/src/backend.rs b/src/backend.rs
new file mode 100644
index 0000000..7d13b84
--- /dev/null
+++ b/src/backend.rs
@@ -0,0 +1,462 @@
+//! Backend: discovery of the local Antigravity language server and HTTP client.
+//!
+//! Uses wreq (BoringSSL) to impersonate Chrome's TLS + HTTP/2 fingerprint,
+//! making our requests indistinguishable from the real Electron webview.
+
+use crate::constants::*;
+use flate2::read::{DeflateDecoder, GzDecoder};
+use std::fs;
+use std::io::Read;
+use std::process::Command;
+use std::sync::LazyLock;
+use tokio::sync::RwLock;
+use tracing::{debug, info, warn};
+use wreq::header::{HeaderMap, HeaderName, HeaderValue};
+
+/// Connection details for the local language server.
+pub struct Backend {
+    inner: RwLock<BackendInner>,
+    client: wreq::Client,
+}
+
+struct BackendInner {
+    pid: String,
+    csrf: String,
+    https_port: String,
+    oauth_token: String,
+}
+
+/// Static headers that never change — built once, in Chrome's exact emission order.
+///
+/// Order matters: wreq preserves insertion order in HTTP/2 HEADERS frames.
+/// This matches the order captured from Chrome DevTools on the real webview.
+static STATIC_HEADERS: LazyLock<HeaderMap> = LazyLock::new(|| {
+    let mut h = HeaderMap::with_capacity(14);
+    // Chrome order: Origin → UA → Accept → Accept-Encoding → Accept-Language
+    //   → sec-ch-ua → sec-ch-ua-mobile → sec-ch-ua-platform
+    //   → Sec-Fetch-Dest → Sec-Fetch-Mode → Sec-Fetch-Site
+    //   → Referer → Priority → Connect-Protocol-Version
+    h.insert("Origin", hv("vscode-file://vscode-app"));
+    h.insert("User-Agent", hv(&USER_AGENT));
+    h.insert("Accept", hv("*/*"));
+    h.insert("Accept-Encoding", hv("gzip, deflate, br, zstd"));
+    h.insert("Accept-Language", hv("en-US"));
+    h.insert(
+        HeaderName::from_static("sec-ch-ua"),
+        hv(&format!(
+            "\"Not_A Brand\";v=\"99\", \"Chromium\";v=\"{}\"",
+            *CHROME_MAJOR,
+        )),
+    );
+    h.insert(
+        HeaderName::from_static("sec-ch-ua-mobile"),
+        hv("?0"),
+    );
+    h.insert(
+        HeaderName::from_static("sec-ch-ua-platform"),
+        hv("\"Linux\""),
+    );
+    h.insert("Sec-Fetch-Dest", hv("empty"));
+    h.insert("Sec-Fetch-Mode", hv("cors"));
+    h.insert("Sec-Fetch-Site", hv("cross-site"));
+    h.insert("Priority", hv("u=1, i"));
+    h.insert("Connect-Protocol-Version", hv("1"));
+    h
+});
+
+impl Backend {
+    /// Discover the running language server and build a BoringSSL-backed connection.
+    pub fn new() -> Result<Self, String> {
+        let inner = discover()?;
+
+        // wreq with Chrome impersonation: BoringSSL + Chrome JA3/JA4 + H2 fingerprint
+        let client = wreq::Client::builder()
+            .emulation(wreq_util::Emulation::Chrome142)
+            .cert_verification(false)   // LS uses self-signed cert
+            .verify_hostname(false)
+            .build()
+            .map_err(|e| format!("wreq client build failed: {e}"))?;
+
+        Ok(Self {
+            inner: RwLock::new(inner),
+            client,
+        })
+    }
+
+    /// Re-discover language server connection details.
+    /// Runs blocking I/O on a spawn_blocking thread to avoid starving tokio.
+    pub async fn refresh(&self) -> Result<(), String> {
+        let new_inner = tokio::task::spawn_blocking(discover)
+            .await
+            .map_err(|e| format!("spawn_blocking failed: {e}"))??;
+        let mut guard = self.inner.write().await;
+        *guard = new_inner;
+        Ok(())
+    }
+
+    /// Get current connection info (for startup banner).
+    pub async fn info(&self) -> (String, String, String, String) {
+        let guard = self.inner.read().await;
+        let token_preview = if guard.oauth_token.is_empty() {
+            "NOT SET".to_string()
+        } else {
+            safe_truncate(&guard.oauth_token, 20)
+        };
+        let csrf_preview = safe_truncate(&guard.csrf, 8);
+        (
+            guard.pid.clone(),
+            guard.https_port.clone(),
+            csrf_preview,
+            token_preview,
+        )
+    }
+
+    /// Get current OAuth token.
+    ///
+    /// Priority: token file > env var > cached value.
+    /// Uses async I/O for file reads. Single write-lock acquisition
+    /// eliminates the TOCTOU race of read-check-then-write.
+    pub async fn oauth_token(&self) -> String {
+        // Check file first (async I/O — won't block tokio)
+        let token_path = token_file_path();
+        if let Ok(contents) = tokio::fs::read_to_string(&token_path).await {
+            let token = contents.trim().to_string();
+            if !token.is_empty() && token.starts_with("ya29.") {
+                // Single lock: compare-and-set atomically
+                let mut guard = self.inner.write().await;
+                if guard.oauth_token != token {
+                    info!("Token updated from file");
+                    guard.oauth_token = token.clone();
+                }
+                return token;
+            }
+        }
+
+        // Then env var
+        if let Ok(env_token) = std::env::var("ANTIGRAVITY_OAUTH_TOKEN") {
+            if !env_token.is_empty() {
+                let mut guard = self.inner.write().await;
+                if guard.oauth_token != env_token {
+                    info!("Token updated from env var");
+                    guard.oauth_token = env_token.clone();
+                }
+                return env_token;
+            }
+        }
+
+        self.inner.read().await.oauth_token.clone()
+    }
+
+    /// Fire-and-forget: update conversation annotations alongside SendUserCascadeMessage.
+    ///
+    /// The real webview calls this after every message to track lastUserViewTime.
+    /// Without it, the LS sees messages without annotation updates — a fingerprint.
+    pub async fn update_annotations(&self, cascade_id: &str) -> Result<(), String> {
+        let now = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true);
+        let body = serde_json::json!({
+            "cascadeId": cascade_id,
+            "annotations": {
+                "lastUserViewTime": now
+            },
+            "mergeAnnotations": true
+        });
+        match self.call_json("UpdateConversationAnnotations", &body).await {
+            Ok((status, _)) => {
+                debug!("UpdateConversationAnnotations: {status}");
+                Ok(())
+            }
+            Err(e) => {
+                warn!("UpdateConversationAnnotations failed: {e}");
+                Err(e)
+            }
+        }
+    }
+
+    /// Set OAuth token at runtime.
+    pub async fn set_oauth_token(&self, token: String) {
+        let mut guard = self.inner.write().await;
+        guard.oauth_token = token;
+    }
+
+    // ─── RPC calls ──────────────────────────────────────────────────────
+
+    /// Common headers: clone cached static + insert per-request CSRF.
+    fn common_headers(csrf: &str) -> HeaderMap {
+        let mut h = STATIC_HEADERS.clone();
+        if let Ok(val) = HeaderValue::from_str(csrf) {
+            h.insert(
+                HeaderName::from_static("x-codeium-csrf-token"),
+                val,
+            );
+        } else {
+            warn!("CSRF token contains invalid header characters, omitting");
+        }
+        h
+    }
+
+    /// Call a JSON RPC method on the language server.
+    pub async fn call_json(
+        &self,
+        method: &str,
+        body: &serde_json::Value,
+    ) -> Result<(u16, serde_json::Value), String> {
+        let (base, csrf) = {
+            let guard = self.inner.read().await;
+            (
+                format!("https://127.0.0.1:{}", guard.https_port),
+                guard.csrf.clone(),
+            )
+        };
+        let url = format!("{base}/{LS_SERVICE}/{method}");
+        let mut headers = Self::common_headers(&csrf);
+        headers.insert("Content-Type", HeaderValue::from_static("application/json"));
+
+        let body_bytes = serde_json::to_vec(body)
+            .map_err(|e| format!("JSON serialize error: {e}"))?;
+
+        let resp = self
+            .client
+            .post(&url)
+            .headers(headers)
+            .body(body_bytes)
+            .send()
+            .await
+            .map_err(|e| format!("HTTP error: {e}"))?;
+
+        let status = resp.status().as_u16();
+        let encoding = resp
+            .headers()
+            .get("content-encoding")
+            .and_then(|v| v.to_str().ok())
+            .unwrap_or("")
+            .to_string();
+        let raw = resp.bytes().await
+            .map_err(|e| format!("Read body error: {e}"))?;
+        let resp_bytes = decompress(method, &raw, &encoding);
+        tracing::debug!(
+            "{method} response ({status}, {} bytes, enc={encoding})",
+            resp_bytes.len(),
+        );
+        tracing::trace!(
+            "{method} body: {}",
+            String::from_utf8_lossy(&resp_bytes[..resp_bytes.len().min(200)])
+        );
+        let data: serde_json::Value = match serde_json::from_slice(&resp_bytes) {
+            Ok(v) => v,
+            Err(e) => {
+                tracing::warn!("{method} response is not valid JSON: {e}");
+                serde_json::Value::Object(serde_json::Map::new())
+            }
+        };
+        Ok((status, data))
+    }
+
+    /// Call a binary protobuf RPC method.
+    pub async fn call_proto(
+        &self,
+        method: &str,
+        body: Vec<u8>,
+    ) -> Result<(u16, Vec<u8>), String> {
+        let (base, csrf) = {
+            let guard = self.inner.read().await;
+            (
+                format!("https://127.0.0.1:{}", guard.https_port),
+                guard.csrf.clone(),
+            )
+        };
+        let url = format!("{base}/{LS_SERVICE}/{method}");
+        let mut headers = Self::common_headers(&csrf);
+        headers.insert("Content-Type", HeaderValue::from_static("application/proto"));
+
+        let resp = self
+            .client
+            .post(&url)
+            .headers(headers)
+            .body(body)
+            .send()
+            .await
+            .map_err(|e| format!("HTTP error: {e}"))?;
+
+        let status = resp.status().as_u16();
+        let encoding = resp
+            .headers()
+            .get("content-encoding")
+            .and_then(|v| v.to_str().ok())
+            .unwrap_or("")
+            .to_string();
+        let raw = resp
+            .bytes()
+            .await
+            .map_err(|e| format!("Read body error: {e}"))?;
+        let decompressed = decompress(method, &raw, &encoding);
+        Ok((status, decompressed))
+    }
+
+    /// StartCascade → returns cascade_id.
+    pub async fn create_cascade(&self) -> Result<String, String> {
+        let body = serde_json::json!({"prompt": "new chat"});
+        let (status, data) = self.call_json("StartCascade", &body).await?;
+        if status != 200 {
+            return Err(format!("StartCascade failed: {status} — {data}"));
+        }
+        tracing::debug!("StartCascade response: {data}");
+        data["cascadeId"]
+            .as_str()
+            .map(|s| s.to_string())
+            .ok_or_else(|| format!("Missing cascadeId in response: {data}"))
+    }
+
+    /// SendUserCascadeMessage with binary protobuf body.
+    pub async fn send_message(
+        &self,
+        cascade_id: &str,
+        text: &str,
+        model_enum: u32,
+    ) -> Result<(u16, Vec<u8>), String> {
+        let token = self.oauth_token().await;
+        if token.is_empty() {
+            return Err("No OAuth token available".to_string());
+        }
+        let proto = crate::proto::build_request(cascade_id, text, &token, model_enum);
+        self.call_proto("SendUserCascadeMessage", proto).await
+    }
+
+    /// GetCascadeTrajectorySteps → JSON with steps array.
+    pub async fn get_steps(
+        &self,
+        cascade_id: &str,
+    ) -> Result<(u16, serde_json::Value), String> {
+        let body = serde_json::json!({"cascadeId": cascade_id});
+        self.call_json("GetCascadeTrajectorySteps", &body).await
+    }
+
+    /// GetCascadeTrajectory → JSON with trajectory status.
+    pub async fn get_trajectory(
+        &self,
+        cascade_id: &str,
+    ) -> Result<(u16, serde_json::Value), String> {
+        let body = serde_json::json!({"cascadeId": cascade_id});
+        self.call_json("GetCascadeTrajectory", &body).await
+    }
+}
+
+// ─── Discovery helpers ───────────────────────────────────────────────────────
+
+fn discover() -> Result<BackendInner, String> {
+    let pid_output = Command::new("sh")
+        .args(["-c", "pgrep -f language_server_linux | head -1"])
+        .output()
+        .map_err(|e| format!("pgrep failed: {e}"))?;
+
+    let pid = String::from_utf8_lossy(&pid_output.stdout)
+        .trim()
+        .to_string();
+    if pid.is_empty() {
+        return Err("Language server not running".to_string());
+    }
+
+    let cmdline = fs::read(format!("/proc/{pid}/cmdline"))
+        .map_err(|e| format!("Can't read cmdline for PID {pid}: {e}"))?;
+    let args: Vec<&[u8]> = cmdline.split(|&b| b == 0).collect();
+    let mut csrf = String::new();
+    for (i, arg) in args.iter().enumerate() {
+        if let Ok(s) = std::str::from_utf8(arg) {
+            if s == "--csrf_token" {
+                if let Some(next) = args.get(i + 1) {
+                    if let Ok(token) = std::str::from_utf8(next) {
+                        csrf = token.to_string();
+                    }
+                }
+            }
+        }
+    }
+    let csrf_preview = safe_truncate(&csrf, 8);
+    debug!("Discovered LS PID={pid}, CSRF={csrf_preview}");
+
+    let log_base = log_base();
+    let mut https_port = String::new();
+
+    if let Ok(mut entries) = fs::read_dir(&log_base) {
+        let mut dirs: Vec<String> = Vec::new();
+        while let Some(Ok(entry)) = entries.next() {
+            let name = entry.file_name().to_string_lossy().to_string();
+            if name.starts_with("202") {
+                dirs.push(name);
+            }
+        }
+        dirs.sort_unstable_by(|a, b| b.cmp(a));
+
+        static PORT_RE: LazyLock<regex::Regex> =
+            LazyLock::new(|| regex::Regex::new(r"port at (\d+) for HTTPS").unwrap());
+
+        for d in &dirs {
+            let log_path = format!(
+                "{log_base}/{d}/window1/exthost/google.antigravity/Antigravity.log"
+            );
+            if let Ok(contents) = fs::read_to_string(&log_path) {
+                for line in contents.lines() {
+                    if line.contains(&pid) && line.contains("listening") && line.contains("HTTPS") {
+                        if let Some(caps) = PORT_RE.captures(line) {
+                            https_port = caps[1].to_string();
+                        }
+                    }
+                }
+                if !https_port.is_empty() {
+                    break;
+                }
+            }
+        }
+    }
+
+    if https_port.is_empty() {
+        warn!("Could not find HTTPS port in logs, defaulting to 3100");
+        https_port = "3100".to_string();
+    }
+
+    let oauth_token = std::env::var("ANTIGRAVITY_OAUTH_TOKEN")
+        .ok()
+        .filter(|s| !s.is_empty())
+        .or_else(|| {
+            let home = std::env::var("HOME").unwrap_or_default();
+            let path = format!("{home}/.config/antigravity-proxy-token");
+            fs::read_to_string(&path)
+                .ok()
+                .map(|s| s.trim().to_string())
+                .filter(|s| !s.is_empty())
+        })
+        .unwrap_or_default();
+
+    Ok(BackendInner {
+        pid,
+        csrf,
+        https_port,
+        oauth_token,
+    })
+}
+
+/// Shorthand for HeaderValue (panics on invalid — only for known-safe static values).
+fn hv(s: &str) -> HeaderValue {
+    HeaderValue::from_str(s).expect("invalid header value in static constant")
+}
+
+/// Decompress response bytes based on Content-Encoding header.
+fn decompress(method: &str, data: &[u8], encoding: &str) -> Vec<u8> {
+    let mut out = Vec::new();
+    let res = match encoding {
+        "gzip" => GzDecoder::new(data).read_to_end(&mut out),
+        "deflate" => DeflateDecoder::new(data).read_to_end(&mut out),
+        "br" => brotli::Decompressor::new(data, 4096).read_to_end(&mut out),
+        _ => return data.to_vec(),
+    };
+
+    match res {
+        Ok(_) => out,
+        Err(e) => {
+            if !encoding.is_empty() {
+                let preview = String::from_utf8_lossy(&data[..data.len().min(100)]);
+                warn!("{method}: {encoding} decompress failed ({} bytes): {e}. Raw: {}", data.len(), preview);
+            }
+            data.to_vec()
+        }
+    }
+}
diff --git a/src/constants.rs b/src/constants.rs
new file mode 100644
index 0000000..5bbce4a
--- /dev/null
+++ b/src/constants.rs
@@ -0,0 +1,217 @@
+//! Shared constants — auto-detected from the installed Antigravity binary at startup.
+//!
+//! On first access, we locate the Antigravity installation (via the running
+//! language server PID or well-known paths), parse `product.json` for version
+//! strings, and extract Chrome/Electron versions from the binary. If detection
+//! fails, we fall back to hardcoded values.
+
+use std::fs;
+use std::process::Command;
+use std::sync::LazyLock;
+
+/// Auto-detected version info from the installed Antigravity app.
+struct DetectedVersions {
+    antigravity: String,
+    chrome: String,
+    electron: String,
+    client: String,
+}
+
+/// Locate the Antigravity install directory by tracing the language server PID
+/// back to its binary, then walking up to the app root. Falls back to
+/// well-known install paths.
+fn find_install_dir() -> Option<String> {
+    // 1. Try tracing the running language server → /usr/share/antigravity/resources/app/extensions/...
+    if let Ok(output) = Command::new("sh")
+        .args(["-c", "pgrep -f language_server_linux | head -1"])
+        .output()
+    {
+        let pid = String::from_utf8_lossy(&output.stdout).trim().to_string();
+        if !pid.is_empty() {
+            if let Ok(exe) = fs::read_link(format!("/proc/{pid}/exe")) {
+                let exe_str = exe.to_string_lossy().to_string();
+                // exe is like: /usr/share/antigravity/resources/app/extensions/antigravity/bin/language_server_linux_x64
+                // We want: /usr/share/antigravity
+                if let Some(idx) = exe_str.find("/resources/") {
+                    return Some(exe_str[..idx].to_string());
+                }
+            }
+        }
+    }
+
+    // 2. Fall back to well-known install paths
+    for path in &["/usr/share/antigravity", "/opt/Antigravity"] {
+        if fs::metadata(format!("{path}/resources/app/product.json")).is_ok() {
+            return Some(path.to_string());
+        }
+    }
+
+    None
+}
+
+/// Read `product.json` from the install dir and extract version fields.
+fn read_product_json(install_dir: &str) -> (Option<String>, Option<String>) {
+    let path = format!("{install_dir}/resources/app/product.json");
+    let Ok(contents) = fs::read_to_string(&path) else {
+        return (None, None);
+    };
+    let Ok(json) = serde_json::from_str::<serde_json::Value>(&contents) else {
+        return (None, None);
+    };
+
+    let version = json["version"].as_str().map(|s| s.to_string());
+    let ide_version = json["ideVersion"].as_str().map(|s| s.to_string());
+    (version, ide_version)
+}
+
+/// Extract Chrome and Electron versions from the main binary via `strings`.
+/// Pattern: "Chrome/142.0.7444.175", "Electron/39.2.3".
+fn extract_binary_versions(install_dir: &str) -> (Option<String>, Option<String>) {
+    let binary = format!("{install_dir}/antigravity");
+    if fs::metadata(&binary).is_err() {
+        return (None, None);
+    }
+
+    // Use grep -oP on the binary to avoid loading the whole thing into memory
+    let chrome = Command::new("sh")
+        .args([
+            "-c",
+            &format!(
+                "strings '{}' | grep -oP 'Chrome/[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+' | head -1",
+                binary
+            ),
+        ])
+        .output()
+        .ok()
+        .and_then(|o| {
+            let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
+            s.strip_prefix("Chrome/").map(|v| v.to_string())
+        });
+
+    let electron = Command::new("sh")
+        .args([
+            "-c",
+            &format!(
+                "strings '{}' | grep -oP 'Electron/[0-9]+\\.[0-9]+\\.[0-9]+' | head -1",
+                binary
+            ),
+        ])
+        .output()
+        .ok()
+        .and_then(|o| {
+            let s = String::from_utf8_lossy(&o.stdout).trim().to_string();
+            s.strip_prefix("Electron/").map(|v| v.to_string())
+        });
+
+    (chrome, electron)
+}
+
+/// Detect all versions from the installed Antigravity app.
+fn detect_versions() -> DetectedVersions {
+    // Hardcoded fallbacks — last known good values
+    const FALLBACK_ANTIGRAVITY: &str = "1.107.0";
+    const FALLBACK_CHROME: &str = "142.0.7444.175";
+    const FALLBACK_ELECTRON: &str = "39.2.3";
+    const FALLBACK_CLIENT: &str = "1.16.5";
+
+    let Some(install_dir) = find_install_dir() else {
+        eprintln!(
+            "[constants] ⚠ Could not find Antigravity install — using fallback versions"
+        );
+        return DetectedVersions {
+            antigravity: FALLBACK_ANTIGRAVITY.to_string(),
+            chrome: FALLBACK_CHROME.to_string(),
+            electron: FALLBACK_ELECTRON.to_string(),
+            client: FALLBACK_CLIENT.to_string(),
+        };
+    };
+
+    // product.json → antigravity version + client/IDE version
+    let (ag_ver, client_ver) = read_product_json(&install_dir);
+
+    // Binary → Chrome + Electron versions
+    let (chrome_ver, electron_ver) = extract_binary_versions(&install_dir);
+
+    let versions = DetectedVersions {
+        antigravity: ag_ver.unwrap_or_else(|| FALLBACK_ANTIGRAVITY.to_string()),
+        chrome: chrome_ver.unwrap_or_else(|| FALLBACK_CHROME.to_string()),
+        electron: electron_ver.unwrap_or_else(|| FALLBACK_ELECTRON.to_string()),
+        client: client_ver.unwrap_or_else(|| FALLBACK_CLIENT.to_string()),
+    };
+
+    eprintln!(
+        "[constants] ✓ Detected versions: Antigravity={}, Chrome={}, Electron={}, Client={}",
+        versions.antigravity, versions.chrome, versions.electron, versions.client
+    );
+
+    versions
+}
+
+// ─── Public API ──────────────────────────────────────────────────────────────
+
+/// All detected versions — computed once on first access.
+static VERSIONS: LazyLock<DetectedVersions> = LazyLock::new(detect_versions);
+
+/// Antigravity app version (e.g. "1.107.0").
+pub fn antigravity_version() -> &'static str {
+    &VERSIONS.antigravity
+}
+
+/// Chrome version bundled with Electron (e.g. "142.0.7444.175").
+pub fn chrome_version() -> &'static str {
+    &VERSIONS.chrome
+}
+
+/// Electron version (e.g. "39.2.3").
+pub fn electron_version() -> &'static str {
+    &VERSIONS.electron
+}
+
+/// Client/IDE version from product.json (e.g. "1.16.5").
+pub fn client_version() -> &'static str {
+    &VERSIONS.client
+}
+
+pub const CLIENT_NAME: &str = "antigravity";
+pub const LS_SERVICE: &str = "exa.language_server_pb.LanguageServerService";
+
+/// Log base directory for Antigravity.
+pub fn log_base() -> String {
+    let home = std::env::var("HOME").unwrap_or_else(|_| "/root".to_string());
+    format!("{home}/.config/Antigravity/logs")
+}
+
+/// Token file path.
+pub fn token_file_path() -> String {
+    let home = std::env::var("HOME").unwrap_or_else(|_| "/root".to_string());
+    format!("{home}/.config/antigravity-proxy-token")
+}
+
+/// User-Agent string matching the Electron webview — computed once.
+pub static USER_AGENT: LazyLock<String> = LazyLock::new(|| {
+    format!(
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
+         (KHTML, like Gecko) Antigravity/{} \
+         Chrome/{} Electron/{} Safari/537.36",
+        antigravity_version(),
+        chrome_version(),
+        electron_version()
+    )
+});
+
+/// Chrome major version for sec-ch-ua header — computed once.
+pub static CHROME_MAJOR: LazyLock<String> = LazyLock::new(|| {
+    chrome_version()
+        .split('.')
+        .next()
+        .unwrap_or("142")
+        .to_string()
+});
+
+/// Safely truncate a string to at most `max` characters (not bytes).
+pub fn safe_truncate(s: &str, max: usize) -> String {
+    match s.char_indices().nth(max) {
+        None => s.to_string(),
+        Some((idx, _)) => format!("{}...", &s[..idx]),
+    }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..d326c09
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,332 @@
+//! Antigravity OpenAI Proxy — Rust edition v3 (stealth hardened).
+//!
+//! Single-binary replacement for server.py. BoringSSL TLS impersonation,
+//! byte-exact protobuf encoding, Chrome header fingerprinting, cascade
+//! session management, warmup + heartbeat lifecycle mimicry.
+
+mod api;
+mod backend;
+mod constants;
+mod mitm;
+mod proto;
+mod quota;
+mod session;
+mod warmup;
+
+use api::AppState;
+use backend::Backend;
+use clap::Parser;
+use session::SessionManager;
+use std::sync::Arc;
+use tracing::{info, warn};
+
+use mitm::store::MitmStore;
+
+#[derive(Parser)]
+#[command(name = "antigravity-proxy", about = "Antigravity OpenAI Proxy (stealth)")]
+struct Cli {
+    /// Port to listen on
+    #[arg(long, default_value_t = 8741)]
+    port: u16,
+
+    /// Enable info-level logging (-v)
+    #[arg(short, long)]
+    verbose: bool,
+
+    /// Enable debug-level logging (-d)
+    #[arg(short, long)]
+    debug: bool,
+
+    /// Disable the MITM proxy (no API interception)
+    #[arg(long)]
+    no_mitm: bool,
+
+    /// MITM proxy port (default: 8742, matches wrapper script)
+    #[arg(long, default_value_t = 8742)]
+    mitm_port: u16,
+}
+
+#[tokio::main]
+async fn main() {
+    let cli = Cli::parse();
+
+    // Flag > env var > default (warn)
+    let log_level = if cli.debug {
+        "debug"
+    } else if cli.verbose {
+        "info"
+    } else {
+        // Fall back to RUST_LOG env, or warn-only
+        ""
+    };
+
+    let filter = if log_level.is_empty() {
+        tracing_subscriber::EnvFilter::try_from_default_env()
+            .unwrap_or_else(|_| "warn".into())
+    } else {
+        tracing_subscriber::EnvFilter::new(log_level)
+    };
+
+    tracing_subscriber::fmt()
+        .with_env_filter(filter)
+        .init();
+
+    // ── Step 1: Bind main port FIRST (fail fast, before spawning anything) ────
+    let addr = format!("127.0.0.1:{}", cli.port);
+    let listener = match tokio::net::TcpListener::bind(&addr).await {
+        Ok(l) => l,
+        Err(e) => {
+            eprintln!("Fatal: cannot bind to {addr}: {e}");
+            eprintln!("Hint: kill $(lsof -ti:{}) 2>/dev/null", cli.port);
+            std::process::exit(1);
+        }
+    };
+
+    // ── Step 2: Backend discovery ─────────────────────────────────────────────
+    let backend = Arc::new(match Backend::new() {
+        Ok(b) => b,
+        Err(e) => {
+            eprintln!("Fatal: {e}");
+            std::process::exit(1);
+        }
+    });
+
+    let (pid, https_port, csrf, token) = backend.info().await;
+
+    // ── Step 3: MITM proxy (after port is secured) ────────────────────────────
+    let mitm_store = MitmStore::new();
+    let (mitm_port_actual, mitm_handle) = if !cli.no_mitm {
+        let data_dir = dirs_data_dir();
+        match mitm::ca::MitmCa::load_or_generate(&data_dir) {
+            Ok(ca) => {
+                let ca = Arc::new(ca);
+                let ca_pem = ca.ca_pem_path.display().to_string();
+                let config = mitm::proxy::MitmConfig {
+                    port: cli.mitm_port,
+                    modify_requests: false,
+                };
+                match mitm::proxy::run(ca, mitm_store.clone(), config).await {
+                    Ok((port, handle)) => {
+                        info!(port, ca = %ca_pem, "MITM proxy started");
+                        // Write actual port to file for wrapper script discovery
+                        let port_file = data_dir.join("mitm-port");
+                        if let Err(e) = std::fs::write(&port_file, port.to_string()) {
+                            warn!("Failed to write MITM port file: {e}");
+                        }
+                        (Some((port, ca_pem)), Some(handle))
+                    }
+                    Err(e) => {
+                        warn!("MITM proxy failed to start: {e}");
+                        (None, None)
+                    }
+                }
+            }
+            Err(e) => {
+                warn!("MITM CA generation failed: {e}");
+                (None, None)
+            }
+        }
+    } else {
+        info!("MITM proxy disabled (--no-mitm)");
+        (None, None)
+    };
+
+    // ── Step 4: Warmup + heartbeat ────────────────────────────────────────────
+    warmup::warmup_sequence(&backend).await;
+    let heartbeat_handle = warmup::start_heartbeat(Arc::clone(&backend));
+
+    // ── Step 4b: Quota monitor ────────────────────────────────────────────────
+    let quota_store = quota::QuotaStore::new();
+    quota_store.clone().start_polling(Arc::clone(&backend));
+    info!("Quota monitor started (polling every 60s)");
+
+    let state = Arc::new(AppState {
+        backend,
+        sessions: SessionManager::new(),
+        mitm_store,
+        quota_store,
+    });
+
+    // Periodic backend refresh — keeps LS connection details fresh
+    let refresh_backend = Arc::clone(&state.backend);
+    let refresh_handle = tokio::spawn(async move {
+        loop {
+            tokio::time::sleep(tokio::time::Duration::from_secs(60)).await;
+            if let Err(e) = refresh_backend.refresh().await {
+                warn!("Periodic refresh failed: {e}");
+            }
+        }
+    });
+
+    // ── Step 5: Start serving ─────────────────────────────────────────────────
+    let app = api::router(state.clone());
+
+    print_banner(cli.port, &pid, &https_port, &csrf, &token, &mitm_port_actual);
+    info!("Listening on http://{addr}");
+
+    axum::serve(listener, app)
+        .with_graceful_shutdown(shutdown_signal())
+        .await
+        .expect("server error");
+
+    // ── Cleanup: abort all background tasks ───────────────────────────────────
+    heartbeat_handle.abort();
+    refresh_handle.abort();
+    if let Some(h) = mitm_handle {
+        h.abort();
+    }
+    // Remove stale MITM port file
+    let _ = std::fs::remove_file(dirs_data_dir().join("mitm-port"));
+    info!("Server shutdown complete");
+}
+
+/// Wait for SIGINT (Ctrl+C) or SIGTERM for graceful shutdown.
+async fn shutdown_signal() {
+    let ctrl_c = async {
+        tokio::signal::ctrl_c()
+            .await
+            .expect("failed to install Ctrl+C handler");
+    };
+
+    #[cfg(unix)]
+    let terminate = async {
+        tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
+            .expect("failed to install SIGTERM handler")
+            .recv()
+            .await;
+    };
+
+    #[cfg(not(unix))]
+    let terminate = std::future::pending::<()>();
+
+    tokio::select! {
+        _ = ctrl_c => info!("Received SIGINT, shutting down..."),
+        _ = terminate => info!("Received SIGTERM, shutting down..."),
+    }
+}
+
+fn print_banner(port: u16, pid: &str, https_port: &str, csrf: &str, token: &str, mitm: &Option<(u16, String)>) {
+    let chrome_major = &*constants::CHROME_MAJOR;
+    let ver = crate::constants::antigravity_version();
+
+    println!();
+    println!("  \x1b[1;35m>> antigravity-proxy\x1b[0m \x1b[2mv{ver}\x1b[0m");
+    println!("  \x1b[2m────────────────────────────────────────────────\x1b[0m");
+    println!();
+    println!("  \x1b[1mcore\x1b[0m");
+    println!("  \x1b[36m  tls\x1b[0m        BoringSSL (Chrome {chrome_major})");
+    println!("  \x1b[36m  listen\x1b[0m     http://127.0.0.1:{port}");
+    println!("  \x1b[36m  ls pid\x1b[0m     {pid}");
+    println!("  \x1b[36m  https\x1b[0m      :{https_port}");
+    println!("  \x1b[36m  csrf\x1b[0m       {csrf}");
+    println!("  \x1b[36m  oauth\x1b[0m      {token}");
+    println!();
+
+    // MITM section
+    if let Some((mitm_port, ca_path)) = mitm {
+        println!("  \x1b[1mmitm\x1b[0m");
+        println!("  \x1b[36m  proxy\x1b[0m      127.0.0.1:{mitm_port}");
+        println!("  \x1b[36m  ca cert\x1b[0m    {ca_path}");
+
+        // Check if wrapper is installed
+        let wrapper_installed = check_wrapper_installed();
+        if wrapper_installed {
+            println!("  \x1b[36m  wrapper\x1b[0m    \x1b[32minstalled\x1b[0m");
+        } else {
+            println!("  \x1b[36m  wrapper\x1b[0m    \x1b[33mnot installed\x1b[0m");
+        }
+        println!();
+    } else {
+        println!("  \x1b[1mmitm\x1b[0m        \x1b[33mdisabled\x1b[0m");
+        println!();
+    }
+
+    // Routes
+    println!("  \x1b[1mroutes\x1b[0m");
+    println!("  \x1b[33m  POST\x1b[0m  /v1/responses");
+    println!("  \x1b[33m  POST\x1b[0m  /v1/chat/completions");
+    println!("  \x1b[32m  GET \x1b[0m  /v1/models");
+    println!("  \x1b[32m  GET \x1b[0m  /v1/sessions");
+    println!("  \x1b[31m  DEL \x1b[0m  /v1/sessions/:id");
+    println!("  \x1b[33m  POST\x1b[0m  /v1/token");
+    println!("  \x1b[32m  GET \x1b[0m  /v1/usage");
+    println!("  \x1b[32m  GET \x1b[0m  /v1/quota");
+    println!("  \x1b[32m  GET \x1b[0m  /health");
+    println!();
+
+    // Status line
+    let mitm_tag = if mitm.is_some() { "\x1b[32mmitm\x1b[0m" } else { "\x1b[31mmitm\x1b[0m" };
+    println!("  \x1b[2mstealth:\x1b[0m \x1b[32mwarmup\x1b[0m \x1b[32mheartbeat\x1b[0m \x1b[32mjitter\x1b[0m {mitm_tag}");
+    println!();
+
+    // Setup hints
+    if let Some((mitm_port, ca_path)) = mitm {
+        if !check_wrapper_installed() {
+            println!("  \x1b[1;33m[!]\x1b[0m mitm wrapper not installed");
+            println!("      \x1b[2mrun:\x1b[0m  ./scripts/mitm-wrapper.sh install");
+            println!("      \x1b[2mor:\x1b[0m   HTTPS_PROXY=http://127.0.0.1:{mitm_port}");
+            println!("             NODE_EXTRA_CA_CERTS={ca_path}");
+            println!();
+        }
+    }
+
+    if token == "NOT SET" {
+        println!("  \x1b[1;33m[!]\x1b[0m no oauth token");
+        println!("      export ANTIGRAVITY_OAUTH_TOKEN=ya29.xxx");
+        println!("      curl -X POST http://127.0.0.1:{port}/v1/token -d '{{\"token\":\"ya29.xxx\"}}'");
+        println!("      echo 'ya29.xxx' > ~/.config/antigravity-proxy-token");
+        println!();
+    }
+}
+
+/// Check if the MITM wrapper is installed by looking for the .real backup file
+/// next to the LS binary. Uses /proc to find the real LS path dynamically.
+fn check_wrapper_installed() -> bool {
+    // Find the LS binary path from known PID or by scanning /proc
+    if let Some(ls_path) = find_ls_binary_path() {
+        let real_path = format!("{ls_path}.real");
+        return std::path::Path::new(&real_path).exists();
+    }
+    false
+}
+
+/// Find the LS binary path by reading /proc/<pid>/exe for known language server processes.
+fn find_ls_binary_path() -> Option<String> {
+    // Try all running processes, look for ones that look like the LS
+    let proc = std::path::Path::new("/proc");
+    if !proc.exists() {
+        return None;
+    }
+
+    if let Ok(entries) = std::fs::read_dir(proc) {
+        for entry in entries.flatten() {
+            let name = entry.file_name();
+            let name_str = name.to_string_lossy();
+            // Only look at numeric dirs (PIDs)
+            if !name_str.chars().all(|c| c.is_ascii_digit()) {
+                continue;
+            }
+            let exe_link = entry.path().join("exe");
+            if let Ok(target) = std::fs::read_link(&exe_link) {
+                let target_str = target.to_string_lossy();
+                // Strip " (deleted)" suffix from unlinked binaries
+                let target_clean = target_str.trim_end_matches(" (deleted)");
+                // Match any binary that looks like the Antigravity LS
+                if target_clean.contains("language_server_linux")
+                    || target_clean.contains("antigravity-language-server")
+                {
+                    // Strip .real suffix — if the wrapper exec'd the backup, we want the base name
+                    let path = target_clean.trim_end_matches(".real");
+                    return Some(path.to_string());
+                }
+            }
+        }
+    }
+    None
+}
+
+/// Get the data directory for storing MITM CA cert/key.
+fn dirs_data_dir() -> std::path::PathBuf {
+    let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
+    std::path::PathBuf::from(home).join(".config").join("antigravity-proxy")
+}
diff --git a/src/mitm/ca.rs b/src/mitm/ca.rs
new file mode 100644
index 0000000..eda8edf
--- /dev/null
+++ b/src/mitm/ca.rs
@@ -0,0 +1,218 @@
+//! Certificate Authority for MITM proxy.
+//!
+//! Generates a self-signed root CA at first run and caches it to disk.
+//! Dynamically generates per-domain leaf certificates signed by this CA.
+
+use rcgen::{
+    BasicConstraints, CertificateParams, DistinguishedName, DnType, ExtendedKeyUsagePurpose,
+    IsCa, KeyPair, KeyUsagePurpose, SanType,
+};
+use rustls::pki_types::{CertificateDer, PrivateKeyDer, PrivatePkcs8KeyDer};
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+use std::sync::Arc;
+use tokio::sync::RwLock;
+use tracing::info;
+
+/// MITM Certificate Authority.
+pub struct MitmCa {
+    /// Root CA certificate (DER-encoded for rustls).
+    ca_cert_der: CertificateDer<'static>,
+    /// Root CA private key.
+    ca_key: KeyPair,
+    /// Signed root CA cert (needed by rcgen to sign leaf certs).
+    ca_signed: rcgen::Certificate,
+    /// Cache of per-domain TLS configs.
+    domain_cache: Arc<RwLock<HashMap<String, Arc<rustls::ServerConfig>>>>,
+    /// Path to the CA PEM file (for SSL_CERT_FILE combined bundle).
+    pub ca_pem_path: PathBuf,
+}
+
+impl MitmCa {
+    /// Load or generate the MITM CA.
+    ///
+    /// The CA cert/key are stored at:
+    ///   `<data_dir>/mitm-ca.pem`  (cert, for NODE_EXTRA_CA_CERTS)
+    ///   `<data_dir>/mitm-ca.key`  (private key)
+    pub fn load_or_generate(data_dir: &Path) -> Result<Self, String> {
+        let cert_path = data_dir.join("mitm-ca.pem");
+        let key_path = data_dir.join("mitm-ca.key");
+
+        if cert_path.exists() && key_path.exists() {
+            info!("Loading existing MITM CA from {}", cert_path.display());
+            let cert_pem = std::fs::read_to_string(&cert_path)
+                .map_err(|e| format!("Failed to read CA cert: {e}"))?;
+            let key_pem = std::fs::read_to_string(&key_path)
+                .map_err(|e| format!("Failed to read CA key: {e}"))?;
+
+            let ca_key = KeyPair::from_pem(&key_pem)
+                .map_err(|e| format!("Failed to parse CA key: {e}"))?;
+
+            // Re-create params and self-sign to get the rcgen Certificate object
+            // (needed for signing leaf certs — rcgen 0.13 doesn't have from_ca_cert_pem).
+            // The re-signed cert will have a different serial/notBefore, but that's fine
+            // because we only use it for the rcgen signing API, NOT for the on-disk PEM.
+            let params = Self::ca_params();
+            let ca_signed = params.self_signed(&ca_key)
+                .map_err(|e| format!("Failed to self-sign CA: {e}"))?;
+
+            // Use the ORIGINAL on-disk PEM cert for DER — this is what the LS trusts
+            // (via the combined CA bundle built by the wrapper script). Writing the
+            // re-signed cert back would desync the LS's trust anchor.
+            let ca_cert_der = Self::pem_to_der(&cert_pem)
+                .unwrap_or_else(|| CertificateDer::from(ca_signed.der().to_vec()));
+
+            Ok(Self {
+                ca_cert_der,
+                ca_key,
+                ca_signed,
+                domain_cache: Arc::new(RwLock::new(HashMap::new())),
+                ca_pem_path: cert_path,
+            })
+        } else {
+            info!("Generating new MITM CA at {}", cert_path.display());
+
+            // Ensure data dir exists
+            std::fs::create_dir_all(data_dir)
+                .map_err(|e| format!("Failed to create data dir: {e}"))?;
+
+            let ca_key = KeyPair::generate()
+                .map_err(|e| format!("Failed to generate CA key: {e}"))?;
+
+            let params = Self::ca_params();
+            let ca_signed = params.self_signed(&ca_key)
+                .map_err(|e| format!("Failed to self-sign CA: {e}"))?;
+
+            // Write cert and key to disk
+            std::fs::write(&cert_path, ca_signed.pem())
+                .map_err(|e| format!("Failed to write CA cert: {e}"))?;
+            std::fs::write(&key_path, ca_key.serialize_pem())
+                .map_err(|e| format!("Failed to write CA key: {e}"))?;
+
+            #[cfg(unix)]
+            {
+                use std::os::unix::fs::PermissionsExt;
+                let _ = std::fs::set_permissions(&key_path, std::fs::Permissions::from_mode(0o600));
+            }
+
+            let ca_cert_der = CertificateDer::from(ca_signed.der().to_vec());
+
+            Ok(Self {
+                ca_cert_der,
+                ca_key,
+                ca_signed,
+                domain_cache: Arc::new(RwLock::new(HashMap::new())),
+                ca_pem_path: cert_path,
+            })
+        }
+    }
+
+    /// Build the CA certificate parameters (reusable for both generate and load).
+    fn ca_params() -> CertificateParams {
+        let mut params = CertificateParams::default();
+
+        let mut dn = DistinguishedName::new();
+        dn.push(DnType::CommonName, "Antigravity MITM CA");
+        dn.push(DnType::OrganizationName, "Antigravity Proxy");
+        params.distinguished_name = dn;
+
+        params.is_ca = IsCa::Ca(BasicConstraints::Unconstrained);
+        params.key_usages = vec![
+            KeyUsagePurpose::KeyCertSign,
+            KeyUsagePurpose::CrlSign,
+        ];
+
+        // Valid for 10 years
+        let now = time::OffsetDateTime::now_utc();
+        params.not_before = now;
+        params.not_after = now + time::Duration::days(3650);
+
+        params
+    }
+
+    /// Parse a PEM certificate into a DER-encoded CertificateDer.
+    fn pem_to_der(pem: &str) -> Option<CertificateDer<'static>> {
+        // Extract base64 content between BEGIN/END markers
+        let mut in_cert = false;
+        let mut b64 = String::new();
+        for line in pem.lines() {
+            if line.contains("BEGIN CERTIFICATE") {
+                in_cert = true;
+                continue;
+            }
+            if line.contains("END CERTIFICATE") {
+                break;
+            }
+            if in_cert {
+                b64.push_str(line.trim());
+            }
+        }
+        if b64.is_empty() {
+            return None;
+        }
+        use base64::Engine;
+        let der = base64::engine::general_purpose::STANDARD.decode(&b64).ok()?;
+        Some(CertificateDer::from(der))
+    }
+
+    /// Get or create a TLS ServerConfig for the given domain.
+    pub async fn server_config_for_domain(&self, domain: &str) -> Result<Arc<rustls::ServerConfig>, String> {
+        // Check cache first
+        {
+            let cache = self.domain_cache.read().await;
+            if let Some(config) = cache.get(domain) {
+                return Ok(config.clone());
+            }
+        }
+
+        // Generate leaf cert for this domain
+        let mut params = CertificateParams::default();
+
+        let mut dn = DistinguishedName::new();
+        dn.push(DnType::CommonName, domain);
+        params.distinguished_name = dn;
+
+        params.subject_alt_names = vec![SanType::DnsName(domain.try_into().map_err(|e| format!("Invalid domain: {e}"))?)];
+        params.extended_key_usages = vec![ExtendedKeyUsagePurpose::ServerAuth];
+        params.key_usages = vec![
+            KeyUsagePurpose::DigitalSignature,
+            KeyUsagePurpose::KeyEncipherment,
+        ];
+
+        // Valid for 1 year
+        let now = time::OffsetDateTime::now_utc();
+        params.not_before = now;
+        params.not_after = now + time::Duration::days(365);
+
+        let leaf_key = KeyPair::generate()
+            .map_err(|e| format!("Failed to generate leaf key: {e}"))?;
+
+        let leaf_cert = params.signed_by(&leaf_key, &self.ca_signed, &self.ca_key)
+            .map_err(|e| format!("Failed to sign leaf cert for {domain}: {e}"))?;
+
+        // Build rustls ServerConfig
+        let leaf_cert_der = CertificateDer::from(leaf_cert.der().to_vec());
+        let leaf_key_der = PrivateKeyDer::Pkcs8(PrivatePkcs8KeyDer::from(leaf_key.serialize_der()));
+
+        let mut config = rustls::ServerConfig::builder()
+            .with_no_client_auth()
+            .with_single_cert(
+                vec![leaf_cert_der, self.ca_cert_der.clone()],
+                leaf_key_der,
+            )
+            .map_err(|e| format!("Failed to build ServerConfig for {domain}: {e}"))?;
+
+        // Advertise both h2 and http/1.1 so gRPC clients can negotiate HTTP/2
+        config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
+
+        let config = Arc::new(config);
+
+        // Cache it
+        {
+            let mut cache = self.domain_cache.write().await;
+            cache.insert(domain.to_string(), config.clone());
+        }
+
+        Ok(config)
+    }
+}
diff --git a/src/mitm/h2_handler.rs b/src/mitm/h2_handler.rs
new file mode 100644
index 0000000..a0c0395
--- /dev/null
+++ b/src/mitm/h2_handler.rs
@@ -0,0 +1,512 @@
+//! HTTP/2 handler for gRPC traffic interception.
+//!
+//! When the LS negotiates HTTP/2 via ALPN (which all gRPC connections do),
+//! this module handles the bidirectional HTTP/2 connection:
+//!   1. Accepts HTTP/2 frames from the client (LS)
+//!   2. Connects to the real upstream via TLS + HTTP/2 (single connection reused)
+//!   3. Forwards each request stream to upstream
+//!   4. For non-streaming: buffers response, extracts usage, forwards
+//!   5. For streaming: forwards response body chunks in real-time, tees to a
+//!      side buffer for usage extraction after stream completes
+//!
+//! ## Streaming vs Non-streaming
+//!
+//! gRPC has both unary (non-streaming) and server-streaming RPCs.
+//! The LS uses server-streaming for methods like `StreamGenerateContent`.
+//! We MUST forward streaming responses immediately — buffering would break
+//! the LS's perception of real-time generation.
+//!
+//! For usage extraction: ModelUsageStats is typically in the LAST message
+//! of a streaming response, so we tee the data and parse after stream ends.
+
+use crate::mitm::proto::parse_grpc_response_for_usage;
+use crate::mitm::store::{ApiUsage, MitmStore};
+
+use bytes::Bytes;
+use http_body_util::{BodyExt, Full, StreamBody};
+use hyper::body::{Frame, Incoming};
+use hyper::server::conn::http2::Builder as H2ServerBuilder;
+use hyper::service::service_fn;
+use hyper::{Request, Response};
+use hyper_util::rt::TokioExecutor;
+use hyper_util::rt::TokioIo;
+use std::sync::Arc;
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio::net::TcpStream;
+use tokio::sync::Mutex;
+use tracing::{debug, info, trace, warn};
+
+/// A lazily-initialized, shared HTTP/2 connection to the upstream server.
+///
+/// gRPC multiplexes many requests over a single HTTP/2 connection.
+/// We mirror this by maintaining a single upstream connection per domain.
+struct UpstreamPool {
+    domain: String,
+    tls_config: Arc<rustls::ClientConfig>,
+    sender: Mutex<Option<hyper::client::conn::http2::SendRequest<Full<Bytes>>>>,
+}
+
+impl UpstreamPool {
+    fn new(domain: String, tls_config: Arc<rustls::ClientConfig>) -> Self {
+        Self {
+            domain,
+            tls_config,
+            sender: Mutex::new(None),
+        }
+    }
+
+    /// Get or create the upstream HTTP/2 sender.
+    async fn get_sender(
+        &self,
+    ) -> Result<hyper::client::conn::http2::SendRequest<Full<Bytes>>, String> {
+        let mut guard = self.sender.lock().await;
+
+        // Check if existing sender is still usable
+        if let Some(ref sender) = *guard {
+            if !sender.is_closed() {
+                return Ok(sender.clone());
+            }
+            debug!(domain = %self.domain, "MITM H2: upstream connection closed, reconnecting");
+        }
+
+        // Create new connection
+        let sender = self.connect().await?;
+        *guard = Some(sender.clone());
+        Ok(sender)
+    }
+
+    async fn connect(
+        &self,
+    ) -> Result<hyper::client::conn::http2::SendRequest<Full<Bytes>>, String> {
+        let upstream_tcp = TcpStream::connect(format!("{}:443", self.domain))
+            .await
+            .map_err(|e| format!("upstream TCP connect to {} failed: {e}", self.domain))?;
+
+        let connector = tokio_rustls::TlsConnector::from(self.tls_config.clone());
+        let server_name = rustls::pki_types::ServerName::try_from(self.domain.clone())
+            .map_err(|e| format!("invalid domain {}: {e}", self.domain))?;
+
+        let upstream_tls = connector
+            .connect(server_name, upstream_tcp)
+            .await
+            .map_err(|e| format!("upstream TLS to {} failed: {e}", self.domain))?;
+
+        let upstream_io = TokioIo::new(upstream_tls);
+        let (sender, conn) =
+            hyper::client::conn::http2::Builder::new(TokioExecutor::new())
+                .handshake(upstream_io)
+                .await
+                .map_err(|e| format!("upstream h2 handshake to {} failed: {e}", self.domain))?;
+
+        let domain = self.domain.clone();
+        tokio::spawn(async move {
+            if let Err(e) = conn.await {
+                debug!(domain = %domain, error = %e, "MITM H2: upstream connection driver ended");
+            }
+        });
+
+        info!(domain = %self.domain, "MITM H2: established upstream HTTP/2 connection");
+        Ok(sender)
+    }
+}
+
+/// gRPC methods that carry ModelUsageStats in their responses.
+const USAGE_METHODS: &[&str] = &[
+    // Unary methods
+    "GenerateContent",
+    "AsyncGenerateContent",
+    "GenerateChat",
+    "GenerateCode",
+    "CompleteCode",
+    "InternalAtomicAgenticChat",
+    "Predict",
+    "DirectPredict",
+    // Streaming methods
+    "StreamGenerateContent",
+    "StreamAsyncGenerateContent",
+    "StreamGenerateChat",
+];
+
+/// Handle an HTTP/2 connection from the LS after TLS termination.
+///
+/// Uses hyper's HTTP/2 server to accept requests and a shared upstream
+/// HTTP/2 connection to forward them.
+pub async fn handle_h2_connection<S>(
+    tls_stream: S,
+    domain: String,
+    store: MitmStore,
+) -> Result<(), String>
+where
+    S: AsyncRead + AsyncWrite + Unpin + Send + 'static,
+{
+    info!(domain = %domain, "MITM H2: handling HTTP/2 connection");
+
+    // Build TLS config for upstream connections
+    let mut root_store = rustls::RootCertStore::empty();
+    let native_certs = rustls_native_certs::load_native_certs();
+    for cert in native_certs.certs {
+        let _ = root_store.add(cert);
+    }
+    let mut upstream_tls_config = rustls::ClientConfig::builder()
+        .with_root_certificates(root_store)
+        .with_no_client_auth();
+    upstream_tls_config.alpn_protocols = vec![b"h2".to_vec()];
+
+    // Shared upstream connection pool (single connection, multiplexed)
+    let pool = Arc::new(UpstreamPool::new(
+        domain.clone(),
+        Arc::new(upstream_tls_config),
+    ));
+
+    let io = TokioIo::new(tls_stream);
+    let domain = Arc::new(domain);
+
+    let result = H2ServerBuilder::new(TokioExecutor::new())
+        .serve_connection(
+            io,
+            service_fn(move |req: Request<Incoming>| {
+                let domain = domain.clone();
+                let store = store.clone();
+                let pool = pool.clone();
+                async move { handle_h2_request(req, &domain, store, pool).await }
+            }),
+        )
+        .await;
+
+    match result {
+        Ok(()) => {
+            debug!("MITM H2: connection closed cleanly");
+            Ok(())
+        }
+        Err(e) => {
+            // Connection errors are expected on clean close
+            debug!(error = %e, "MITM H2: connection ended");
+            Ok(())
+        }
+    }
+}
+
+/// Response body type — either buffered or streaming.
+type BoxBody = http_body_util::Either<
+    Full<Bytes>,
+    StreamBody<tokio_stream::wrappers::ReceiverStream<Result<Frame<Bytes>, hyper::Error>>>,
+>;
+
+/// Handle a single HTTP/2 request: forward to upstream, capture usage.
+///
+/// For streaming responses, forwards chunks in real-time while teeing
+/// data to a side buffer for post-stream usage extraction.
+async fn handle_h2_request(
+    req: Request<Incoming>,
+    domain: &str,
+    store: MitmStore,
+    pool: Arc<UpstreamPool>,
+) -> Result<Response<BoxBody>, hyper::Error> {
+    let method = req.method().clone();
+    let uri = req.uri().clone();
+    let path = uri.path().to_string();
+
+    // Identify gRPC method
+    let is_grpc = req
+        .headers()
+        .get("content-type")
+        .and_then(|v| v.to_str().ok())
+        .map(|ct| ct.starts_with("application/grpc"))
+        .unwrap_or(false);
+
+    // Check if this method carries usage data
+    let is_usage_method = is_grpc
+        && USAGE_METHODS.iter().any(|m| path.contains(m));
+
+    // Check if this is a streaming method
+    let is_streaming = is_grpc
+        && (path.contains("Stream") || path.contains("stream"));
+
+    debug!(
+        domain,
+        %method,
+        path = %path,
+        grpc = is_grpc,
+        usage_method = is_usage_method,
+        streaming = is_streaming,
+        "MITM H2: forwarding request"
+    );
+
+    // Collect request body (we need it for cascade ID extraction)
+    let (parts, body) = req.into_parts();
+    let request_body = match body.collect().await {
+        Ok(collected) => collected.to_bytes(),
+        Err(e) => {
+            warn!(error = %e, "MITM H2: failed to collect request body");
+            Bytes::new()
+        }
+    };
+
+    // Get upstream sender from pool
+    let mut upstream_sender = match pool.get_sender().await {
+        Ok(s) => s,
+        Err(e) => {
+            warn!(error = %e, domain, "MITM H2: upstream connect failed");
+            let resp = Response::builder()
+                .status(502)
+                .body(http_body_util::Either::Left(Full::new(
+                    Bytes::from(format!("upstream connect failed: {e}")),
+                )))
+                .unwrap();
+            return Ok(resp);
+        }
+    };
+
+    // Build the upstream request with proper authority
+    let upstream_uri = http::Uri::builder()
+        .scheme("https")
+        .authority(domain)
+        .path_and_query(
+            uri.path_and_query()
+                .map(|pq| pq.as_str())
+                .unwrap_or("/"),
+        )
+        .build()
+        .unwrap_or(uri);
+
+    let mut upstream_req = Request::builder()
+        .method(parts.method)
+        .uri(upstream_uri);
+
+    // Copy headers, skip hop-by-hop
+    for (name, value) in &parts.headers {
+        let n = name.as_str();
+        if n == "host" || n == "connection" || n == "transfer-encoding" {
+            continue;
+        }
+        upstream_req = upstream_req.header(name, value);
+    }
+
+    let upstream_req = match upstream_req.body(Full::new(request_body.clone())) {
+        Ok(r) => r,
+        Err(e) => {
+            let resp = Response::builder()
+                .status(502)
+                .body(http_body_util::Either::Left(Full::new(
+                    Bytes::from(format!("build request failed: {e}")),
+                )))
+                .unwrap();
+            return Ok(resp);
+        }
+    };
+
+    // Send to upstream
+    let upstream_resp = match upstream_sender.send_request(upstream_req).await {
+        Ok(r) => r,
+        Err(e) => {
+            warn!(error = %e, domain, path = %path, "MITM H2: upstream request failed");
+            let resp = Response::builder()
+                .status(502)
+                .body(http_body_util::Either::Left(Full::new(
+                    Bytes::from(format!("upstream request failed: {e}")),
+                )))
+                .unwrap();
+            return Ok(resp);
+        }
+    };
+
+    let (resp_parts, resp_body) = upstream_resp.into_parts();
+    let status = resp_parts.status;
+
+    // ──────────────────────────────────────────────────────────────────
+    // Streaming path: forward chunks immediately, tee for usage parsing
+    // ──────────────────────────────────────────────────────────────────
+    if is_streaming && status.is_success() {
+        let should_track_usage = is_usage_method;
+        let (tx, rx) = tokio::sync::mpsc::channel::<Result<Frame<Bytes>, hyper::Error>>(32);
+
+        let store_clone = store.clone();
+        let path_clone = path.clone();
+        let request_body_clone = request_body.clone();
+
+        // Spawn a task to forward body chunks and tee for usage extraction
+        tokio::spawn(async move {
+            let mut tee_buffer = if should_track_usage { Some(Vec::new()) } else { None };
+            let mut body = resp_body;
+
+            loop {
+                match body.frame().await {
+                    Some(Ok(frame)) => {
+                        if let (Some(ref mut buf), Some(data)) = (&mut tee_buffer, frame.data_ref()) {
+                            buf.extend_from_slice(data);
+                        }
+                        if tx.send(Ok(frame)).await.is_err() {
+                            break; // client disconnected
+                        }
+                    }
+                    Some(Err(e)) => {
+                        warn!(error = %e, path = %path_clone, "MITM H2: streaming error");
+                        let _ = tx.send(Err(e)).await;
+                        break;
+                    }
+                    None => break, // stream ended
+                }
+            }
+
+            // Stream completed — parse the tee buffer for usage
+            if let Some(tee_buffer) = tee_buffer {
+                if !tee_buffer.is_empty() {
+                    if let Some(grpc_usage) = parse_grpc_response_for_usage(&tee_buffer) {
+                        let usage = ApiUsage {
+                            input_tokens: grpc_usage.input_tokens,
+                            output_tokens: grpc_usage.output_tokens,
+                            thinking_output_tokens: grpc_usage.thinking_output_tokens,
+                            response_output_tokens: grpc_usage.response_output_tokens,
+                            cache_creation_input_tokens: grpc_usage.cache_write_tokens,
+                            cache_read_input_tokens: grpc_usage.cache_read_tokens,
+                            model: grpc_usage.model,
+                            api_provider: grpc_usage.api_provider,
+                            grpc_method: Some(path_clone.clone()),
+                            stop_reason: None,
+                            total_cost_usd: None,
+                            captured_at: std::time::SystemTime::now()
+                                .duration_since(std::time::UNIX_EPOCH)
+                                .unwrap_or_default()
+                                .as_secs(),
+                        };
+                        let cascade_hint = extract_cascade_from_grpc_request(&request_body_clone);
+                        store_clone.record_usage(cascade_hint.as_deref(), usage).await;
+                    }
+                }
+            }
+        });
+
+        let stream = tokio_stream::wrappers::ReceiverStream::new(rx);
+        let stream_body = StreamBody::new(stream);
+
+        let mut client_resp = Response::builder().status(resp_parts.status);
+        for (name, value) in &resp_parts.headers {
+            client_resp = client_resp.header(name, value);
+        }
+
+        let client_resp = client_resp
+            .body(http_body_util::Either::Right(stream_body))
+            .unwrap_or_else(|_| {
+                Response::builder()
+                    .status(500)
+                    .body(http_body_util::Either::Left(Full::new(Bytes::from(
+                        "internal error",
+                    ))))
+                    .unwrap()
+            });
+
+        return Ok(client_resp);
+    }
+
+    // ──────────────────────────────────────────────────────────────────
+    // Non-streaming path: buffer full response, extract usage, forward
+    // ──────────────────────────────────────────────────────────────────
+    let response_body = match resp_body.collect().await {
+        Ok(collected) => collected.to_bytes(),
+        Err(e) => {
+            warn!(error = %e, "MITM H2: failed to collect response body");
+            Bytes::new()
+        }
+    };
+
+    trace!(
+        domain,
+        path = %path,
+        status = %status,
+        body_len = response_body.len(),
+        "MITM H2: got upstream response"
+    );
+
+    // Extract usage data from usage-carrying gRPC methods
+    if is_usage_method && !response_body.is_empty() && status.is_success() {
+        if let Some(grpc_usage) = parse_grpc_response_for_usage(&response_body) {
+            let usage = ApiUsage {
+                input_tokens: grpc_usage.input_tokens,
+                output_tokens: grpc_usage.output_tokens,
+                thinking_output_tokens: grpc_usage.thinking_output_tokens,
+                response_output_tokens: grpc_usage.response_output_tokens,
+                cache_creation_input_tokens: grpc_usage.cache_write_tokens,
+                cache_read_input_tokens: grpc_usage.cache_read_tokens,
+                model: grpc_usage.model,
+                api_provider: grpc_usage.api_provider,
+                grpc_method: Some(path.clone()),
+                stop_reason: None,
+                total_cost_usd: None,
+                captured_at: std::time::SystemTime::now()
+                    .duration_since(std::time::UNIX_EPOCH)
+                    .unwrap_or_default()
+                    .as_secs(),
+            };
+
+            let cascade_hint = extract_cascade_from_grpc_request(&request_body);
+            store.record_usage(cascade_hint.as_deref(), usage).await;
+        }
+    }
+
+    // Build response for the client
+    let mut client_resp = Response::builder().status(resp_parts.status);
+    for (name, value) in &resp_parts.headers {
+        client_resp = client_resp.header(name, value);
+    }
+
+    let client_resp = client_resp
+        .body(http_body_util::Either::Left(Full::new(response_body)))
+        .unwrap_or_else(|_| {
+            Response::builder()
+                .status(500)
+                .body(http_body_util::Either::Left(Full::new(Bytes::from(
+                    "internal error",
+                ))))
+                .unwrap()
+        });
+
+    Ok(client_resp)
+}
+
+/// Try to extract a cascade ID from a gRPC request body.
+///
+/// Looks for UUID-formatted strings in the protobuf fields.
+fn extract_cascade_from_grpc_request(body: &[u8]) -> Option<String> {
+    use crate::mitm::proto::{decode_proto, extract_grpc_messages};
+
+    let messages = extract_grpc_messages(body);
+    for msg in &messages {
+        let fields = decode_proto(msg);
+        for field in &fields {
+            if let Some(id) = extract_uuid_from_field(field) {
+                return Some(id);
+            }
+        }
+    }
+
+    None
+}
+
+fn extract_uuid_from_field(field: &crate::mitm::proto::ProtoField) -> Option<String> {
+    use crate::mitm::proto::ProtoValue;
+
+    match &field.value {
+        ProtoValue::Bytes(b) => {
+            if let Ok(s) = std::str::from_utf8(b) {
+                if is_uuid(s) {
+                    return Some(s.to_string());
+                }
+            }
+        }
+        ProtoValue::Message(nested) => {
+            for nf in nested {
+                if let Some(id) = extract_uuid_from_field(nf) {
+                    return Some(id);
+                }
+            }
+        }
+        _ => {}
+    }
+    None
+}
+
+fn is_uuid(s: &str) -> bool {
+    s.len() == 36
+        && s.chars().all(|c| c.is_ascii_hexdigit() || c == '-')
+        && s.chars().filter(|&c| c == '-').count() == 4
+}
diff --git a/src/mitm/intercept.rs b/src/mitm/intercept.rs
new file mode 100644
index 0000000..130586d
--- /dev/null
+++ b/src/mitm/intercept.rs
@@ -0,0 +1,271 @@
+//! API response interceptor: parses Anthropic/Google API responses to extract usage data.
+//!
+//! Handles both streaming (SSE) and non-streaming (JSON) responses.
+
+use super::store::ApiUsage;
+use serde_json::Value;
+use tracing::{debug, trace};
+
+/// Parse a complete (non-streaming) Anthropic Messages API response body.
+///
+/// Response format:
+/// ```json
+/// {
+///   "id": "msg_...",
+///   "type": "message",
+///   "model": "claude-sonnet-4-20250514",
+///   "usage": {
+///     "input_tokens": 1234,
+///     "output_tokens": 567,
+///     "cache_creation_input_tokens": 0,
+///     "cache_read_input_tokens": 890
+///   },
+///   "stop_reason": "end_turn"
+/// }
+/// ```
+pub fn parse_non_streaming_response(body: &[u8]) -> Option<ApiUsage> {
+    let json: Value = serde_json::from_slice(body).ok()?;
+    extract_usage_from_message(&json)
+}
+
+/// Parse SSE events from a streaming Anthropic response body chunk.
+///
+/// Events of interest:
+/// - `message_start` — contains `message.usage.input_tokens` + cache tokens
+/// - `message_delta` — contains `usage.output_tokens`
+/// - `message_stop` — marks end (no usage data)
+///
+/// Returns accumulated usage across all events in this chunk.
+pub fn parse_streaming_chunk(chunk: &str, accumulator: &mut StreamingAccumulator) {
+    for line in chunk.lines() {
+        if let Some(data) = line.strip_prefix("data: ") {
+            if data.trim() == "[DONE]" {
+                continue;
+            }
+            if let Ok(event) = serde_json::from_str::<Value>(data) {
+                accumulator.process_event(&event);
+            }
+        }
+    }
+}
+
+/// Accumulates usage data across streaming SSE events.
+#[derive(Debug, Default)]
+pub struct StreamingAccumulator {
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub cache_creation_input_tokens: u64,
+    pub cache_read_input_tokens: u64,
+    pub model: Option<String>,
+    pub stop_reason: Option<String>,
+    pub is_complete: bool,
+}
+
+impl StreamingAccumulator {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Process a single SSE event.
+    pub fn process_event(&mut self, event: &Value) {
+        let event_type = event["type"].as_str().unwrap_or("");
+
+        match event_type {
+            "message_start" => {
+                // message_start contains the initial usage (input tokens + cache)
+                if let Some(usage) = event.get("message").and_then(|m| m.get("usage")) {
+                    self.input_tokens = usage["input_tokens"].as_u64().unwrap_or(0);
+                    self.cache_creation_input_tokens = usage["cache_creation_input_tokens"].as_u64().unwrap_or(0);
+                    self.cache_read_input_tokens = usage["cache_read_input_tokens"].as_u64().unwrap_or(0);
+                }
+                if let Some(model) = event.get("message").and_then(|m| m["model"].as_str()) {
+                    self.model = Some(model.to_string());
+                }
+                trace!(
+                    input = self.input_tokens,
+                    cache_read = self.cache_read_input_tokens,
+                    cache_create = self.cache_creation_input_tokens,
+                    "SSE message_start: captured input usage"
+                );
+            }
+            "message_delta" => {
+                // message_delta contains the output usage
+                if let Some(usage) = event.get("usage") {
+                    self.output_tokens = usage["output_tokens"].as_u64().unwrap_or(self.output_tokens);
+                }
+                if let Some(reason) = event["delta"]["stop_reason"].as_str() {
+                    self.stop_reason = Some(reason.to_string());
+                }
+                trace!(output = self.output_tokens, "SSE message_delta: updated output tokens");
+            }
+            "message_stop" => {
+                self.is_complete = true;
+                debug!(
+                    input = self.input_tokens,
+                    output = self.output_tokens,
+                    cache_read = self.cache_read_input_tokens,
+                    model = ?self.model,
+                    "SSE message_stop: stream complete"
+                );
+            }
+            "content_block_start" | "content_block_delta" | "content_block_stop" | "ping" => {
+                // Content events — no usage data, just pass through
+            }
+            _ => {
+                trace!(event_type, "SSE: unknown event type");
+            }
+        }
+    }
+
+    /// Convert accumulated data to an ApiUsage.
+    pub fn into_usage(self) -> ApiUsage {
+        ApiUsage {
+            input_tokens: self.input_tokens,
+            output_tokens: self.output_tokens,
+            cache_creation_input_tokens: self.cache_creation_input_tokens,
+            cache_read_input_tokens: self.cache_read_input_tokens,
+            thinking_output_tokens: 0,
+            response_output_tokens: 0,
+            total_cost_usd: None,
+            model: self.model,
+            stop_reason: self.stop_reason,
+            api_provider: Some("anthropic".to_string()),
+            grpc_method: None,
+            captured_at: std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap_or_default()
+                .as_secs(),
+        }
+    }
+}
+
+/// Extract usage from a complete Message JSON object.
+fn extract_usage_from_message(msg: &Value) -> Option<ApiUsage> {
+    let usage = msg.get("usage")?;
+
+    Some(ApiUsage {
+        input_tokens: usage["input_tokens"].as_u64().unwrap_or(0),
+        output_tokens: usage["output_tokens"].as_u64().unwrap_or(0),
+        cache_creation_input_tokens: usage["cache_creation_input_tokens"].as_u64().unwrap_or(0),
+        cache_read_input_tokens: usage["cache_read_input_tokens"].as_u64().unwrap_or(0),
+        thinking_output_tokens: 0,
+        response_output_tokens: 0,
+        total_cost_usd: None,
+        model: msg["model"].as_str().map(|s| s.to_string()),
+        stop_reason: msg["stop_reason"].as_str().map(|s| s.to_string()),
+        api_provider: Some("anthropic".to_string()),
+        grpc_method: None,
+        captured_at: std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs(),
+    })
+}
+
+/// Try to identify a cascade ID from the request body.
+///
+/// The LS includes cascade-related metadata in its API requests (as part of
+/// the system prompt or metadata field). We try to find it.
+pub fn extract_cascade_hint(request_body: &[u8]) -> Option<String> {
+    let json: Value = serde_json::from_slice(request_body).ok()?;
+
+    // Check for metadata field (some API configurations include it)
+    if let Some(metadata) = json.get("metadata") {
+        if let Some(user_id) = metadata["user_id"].as_str() {
+            // The LS often sets user_id to the cascadeId
+            return Some(user_id.to_string());
+        }
+    }
+
+    // Check system prompt for cascade/workspace markers
+    if let Some(system) = json.get("system") {
+        let system_str = match system {
+            Value::String(s) => s.clone(),
+            Value::Array(arr) => {
+                // Array of content blocks
+                arr.iter()
+                    .filter_map(|b| b["text"].as_str())
+                    .collect::<Vec<_>>()
+                    .join(" ")
+            }
+            _ => return None,
+        };
+        // Look for workspace_id or cascade_id patterns
+        if let Some(pos) = system_str.find("workspace_id") {
+            let rest = &system_str[pos..];
+            // Extract the value after workspace_id
+            if let Some(val) = rest.split_whitespace().nth(1) {
+                return Some(val.to_string());
+            }
+        }
+    }
+
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_non_streaming() {
+        let body = r#"{
+            "id": "msg_123",
+            "type": "message",
+            "model": "claude-sonnet-4-20250514",
+            "usage": {
+                "input_tokens": 100,
+                "output_tokens": 50,
+                "cache_creation_input_tokens": 10,
+                "cache_read_input_tokens": 30
+            },
+            "stop_reason": "end_turn"
+        }"#;
+
+        let usage = parse_non_streaming_response(body.as_bytes()).unwrap();
+        assert_eq!(usage.input_tokens, 100);
+        assert_eq!(usage.output_tokens, 50);
+        assert_eq!(usage.cache_creation_input_tokens, 10);
+        assert_eq!(usage.cache_read_input_tokens, 30);
+        assert_eq!(usage.model.as_deref(), Some("claude-sonnet-4-20250514"));
+    }
+
+    #[test]
+    fn test_streaming_accumulator() {
+        let mut acc = StreamingAccumulator::new();
+
+        // message_start
+        let start = serde_json::json!({
+            "type": "message_start",
+            "message": {
+                "model": "claude-sonnet-4-20250514",
+                "usage": {
+                    "input_tokens": 200,
+                    "cache_creation_input_tokens": 5,
+                    "cache_read_input_tokens": 50
+                }
+            }
+        });
+        acc.process_event(&start);
+        assert_eq!(acc.input_tokens, 200);
+        assert_eq!(acc.cache_read_input_tokens, 50);
+
+        // message_delta
+        let delta = serde_json::json!({
+            "type": "message_delta",
+            "delta": { "stop_reason": "end_turn" },
+            "usage": { "output_tokens": 75 }
+        });
+        acc.process_event(&delta);
+        assert_eq!(acc.output_tokens, 75);
+
+        // message_stop
+        let stop = serde_json::json!({ "type": "message_stop" });
+        acc.process_event(&stop);
+        assert!(acc.is_complete);
+
+        let usage = acc.into_usage();
+        assert_eq!(usage.input_tokens, 200);
+        assert_eq!(usage.output_tokens, 75);
+    }
+}
diff --git a/src/mitm/mod.rs b/src/mitm/mod.rs
new file mode 100644
index 0000000..c1c3d10
--- /dev/null
+++ b/src/mitm/mod.rs
@@ -0,0 +1,19 @@
+//! MITM proxy module: intercepts LS ↔ Google/Anthropic API traffic.
+//!
+//! The LS (Go binary with BoringCrypto) respects `HTTPS_PROXY` and `SSL_CERT_FILE`.
+//! By setting these env vars via the wrapper script, we route all outbound HTTPS
+//! traffic through our local MITM proxy, which:
+//!
+//! 1. Terminates TLS using dynamically-generated per-domain certificates
+//! 2. Detects protocol: HTTP/1.1 (REST) or HTTP/2 (gRPC)
+//! 3. For HTTP/1.1: parses JSON/SSE responses (Anthropic format)
+//! 4. For HTTP/2: decodes gRPC protobuf responses (Google format)
+//! 5. Captures token usage data (input, output, thinking, cache)
+//! 6. Forwards everything transparently to real upstream servers
+
+pub mod ca;
+pub mod h2_handler;
+pub mod intercept;
+pub mod proto;
+pub mod proxy;
+pub mod store;
diff --git a/src/mitm/proto.rs b/src/mitm/proto.rs
new file mode 100644
index 0000000..a3ed81c
--- /dev/null
+++ b/src/mitm/proto.rs
@@ -0,0 +1,584 @@
+//! Raw protobuf decoder for extracting ModelUsageStats from gRPC responses.
+//!
+//! We don't have the .proto schema, so we decode protobuf messages generically
+//! and search for usage-like structures by matching field patterns.
+//!
+//! gRPC wire format:
+//!   - 1 byte: compression flag (0 = uncompressed, 1 = compressed)
+//!   - 4 bytes: message length (big-endian u32)
+//!   - N bytes: protobuf message
+//!
+//! Protobuf wire format:
+//!   - Each field: (field_number << 3 | wire_type) as varint, then value
+//!   - Wire type 0: varint
+//!   - Wire type 1: 64-bit fixed
+//!   - Wire type 2: length-delimited (string, bytes, embedded message)
+//!   - Wire type 5: 32-bit fixed
+//!
+//! ## ModelUsageStats schema (reverse-engineered from LS binary):
+//!
+//! ```protobuf
+//! message ModelUsageStats {
+//!     Model model = 1;                          // enum (varint)
+//!     uint64 input_tokens = 2;
+//!     uint64 output_tokens = 3;
+//!     uint64 cache_write_tokens = 4;
+//!     uint64 cache_read_tokens = 5;
+//!     APIProvider api_provider = 6;             // enum (varint)
+//!     string message_id = 7;
+//!     map<string,string> response_header = 8;   // repeated message
+//!     uint64 thinking_output_tokens = 9;
+//!     uint64 response_output_tokens = 10;
+//!     string response_id = 11;
+//! }
+//! ```
+
+use flate2::read::GzDecoder;
+use std::io::Read;
+use tracing::{debug, trace, warn};
+
+/// A decoded protobuf field.
+#[derive(Debug, Clone)]
+pub enum ProtoValue {
+    Varint(u64),
+    #[allow(dead_code)]
+    Fixed64(u64),
+    #[allow(dead_code)]
+    Fixed32(u32),
+    Bytes(Vec<u8>),
+    /// Nested message (parsed recursively)
+    Message(Vec<ProtoField>),
+}
+
+/// A single protobuf field with its number and value.
+#[derive(Debug, Clone)]
+pub struct ProtoField {
+    pub number: u32,
+    pub value: ProtoValue,
+}
+
+/// Extracted usage data from a gRPC response.
+#[derive(Debug, Default)]
+pub struct GrpcUsage {
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub thinking_output_tokens: u64,
+    pub response_output_tokens: u64,
+    pub cache_read_tokens: u64,
+    pub cache_write_tokens: u64,
+    pub model: Option<String>,
+    pub api_provider: Option<String>,
+    pub message_id: Option<String>,
+    pub response_id: Option<String>,
+}
+
+/// Extract gRPC message frames from a buffer.
+///
+/// A gRPC message is:
+///   [1 byte compressed flag] [4 bytes length BE] [N bytes protobuf]
+///
+/// Multiple messages can be concatenated in a single buffer.
+/// If compressed flag is 1, the message is gzip-decompressed.
+pub fn extract_grpc_messages(data: &[u8]) -> Vec<Vec<u8>> {
+    let mut messages = Vec::new();
+    let mut offset = 0;
+
+    while offset + 5 <= data.len() {
+        let compressed = data[offset];
+        let length = u32::from_be_bytes([
+            data[offset + 1],
+            data[offset + 2],
+            data[offset + 3],
+            data[offset + 4],
+        ]) as usize;
+
+        offset += 5;
+
+        if offset + length > data.len() {
+            break;
+        }
+
+        let payload = &data[offset..offset + length];
+
+        if compressed == 1 {
+            // gzip-compressed frame
+            let mut decoder = GzDecoder::new(payload);
+            let mut decompressed = Vec::new();
+            match decoder.read_to_end(&mut decompressed) {
+                Ok(_) => messages.push(decompressed),
+                Err(e) => {
+                    warn!(error = %e, "Proto: failed to decompress gRPC frame");
+                }
+            }
+        } else {
+            messages.push(payload.to_vec());
+        }
+
+        offset += length;
+    }
+
+    messages
+}
+
+/// Decode a protobuf message into a list of fields.
+///
+/// This is a best-effort decoder that handles the common wire types.
+/// Embedded messages (wire type 2) are attempted to be parsed recursively.
+pub fn decode_proto(data: &[u8]) -> Vec<ProtoField> {
+    let mut fields = Vec::new();
+    let mut offset = 0;
+
+    while offset < data.len() {
+        // Read tag (varint)
+        let (tag, bytes_read) = match read_varint(&data[offset..]) {
+            Some(v) => v,
+            None => break,
+        };
+        offset += bytes_read;
+
+        let field_number = (tag >> 3) as u32;
+        let wire_type = (tag & 0x07) as u8;
+
+        if field_number == 0 {
+            break; // invalid
+        }
+
+        let value = match wire_type {
+            0 => {
+                // Varint
+                let (val, bytes_read) = match read_varint(&data[offset..]) {
+                    Some(v) => v,
+                    None => break,
+                };
+                offset += bytes_read;
+                ProtoValue::Varint(val)
+            }
+            1 => {
+                // 64-bit fixed
+                if offset + 8 > data.len() {
+                    break;
+                }
+                let val = u64::from_le_bytes(data[offset..offset + 8].try_into().unwrap());
+                offset += 8;
+                ProtoValue::Fixed64(val)
+            }
+            2 => {
+                // Length-delimited
+                let (len, bytes_read) = match read_varint(&data[offset..]) {
+                    Some(v) => v,
+                    None => break,
+                };
+                offset += bytes_read;
+                let len = len as usize;
+
+                if offset + len > data.len() {
+                    break;
+                }
+
+                let payload = &data[offset..offset + len];
+                offset += len;
+
+                // Try to parse as a nested message
+                let nested = decode_proto(payload);
+                if !nested.is_empty() && looks_like_valid_message(&nested, payload.len()) {
+                    ProtoValue::Message(nested)
+                } else {
+                    ProtoValue::Bytes(payload.to_vec())
+                }
+            }
+            5 => {
+                // 32-bit fixed
+                if offset + 4 > data.len() {
+                    break;
+                }
+                let val = u32::from_le_bytes(data[offset..offset + 4].try_into().unwrap());
+                offset += 4;
+                ProtoValue::Fixed32(val)
+            }
+            _ => {
+                // Unknown wire type — stop parsing
+                break;
+            }
+        };
+
+        fields.push(ProtoField {
+            number: field_number,
+            value,
+        });
+    }
+
+    fields
+}
+
+/// Heuristic: does this list of fields look like a valid protobuf message?
+/// (vs. a random string that happened to partially decode)
+fn looks_like_valid_message(fields: &[ProtoField], original_len: usize) -> bool {
+    if fields.is_empty() {
+        return false;
+    }
+
+    // Check that field numbers are reasonable (< 10000)
+    let valid_numbers = fields.iter().all(|f| f.number < 10000);
+    if !valid_numbers {
+        return false;
+    }
+
+    // If we have very few fields relative to the data size, it's probably not a message
+    // (e.g., a long string that happened to have a valid first-field prefix)
+    if fields.len() == 1 && original_len > 100 {
+        // Single-field messages of >100 bytes are suspicious unless the field is bytes/message
+        match &fields[0].value {
+            ProtoValue::Bytes(_) | ProtoValue::Message(_) => true,
+            _ => false,
+        }
+    } else {
+        true
+    }
+}
+
+/// Read a varint from a byte slice. Returns (value, bytes_consumed).
+pub fn read_varint(data: &[u8]) -> Option<(u64, usize)> {
+    let mut result: u64 = 0;
+    let mut shift = 0;
+
+    for (i, &byte) in data.iter().enumerate() {
+        if i >= 10 {
+            return None; // Too many bytes for a varint
+        }
+
+        result |= ((byte & 0x7F) as u64) << shift;
+        shift += 7;
+
+        if byte & 0x80 == 0 {
+            return Some((result, i + 1));
+        }
+    }
+
+    None
+}
+
+/// Search a decoded protobuf message tree for usage-like structures.
+///
+/// Uses the exact field numbers from the reverse-engineered ModelUsageStats schema:
+///
+///   field  1: model (enum/varint)
+///   field  2: input_tokens (uint64)
+///   field  3: output_tokens (uint64)
+///   field  4: cache_write_tokens (uint64)
+///   field  5: cache_read_tokens (uint64)
+///   field  6: api_provider (enum/varint)
+///   field  7: message_id (string)
+///   field  8: response_header (map, repeated message)
+///   field  9: thinking_output_tokens (uint64)
+///   field 10: response_output_tokens (uint64)
+///   field 11: response_id (string)
+pub fn extract_usage_from_proto(fields: &[ProtoField]) -> Option<GrpcUsage> {
+    // Strategy: recursively search for any sub-message that looks like usage data
+    // Try this level first
+    if let Some(usage) = try_extract_usage(fields) {
+        return Some(usage);
+    }
+
+    // Recurse into nested messages
+    for field in fields {
+        if let ProtoValue::Message(ref nested) = field.value {
+            if let Some(usage) = extract_usage_from_proto(nested) {
+                return Some(usage);
+            }
+        }
+    }
+
+    None
+}
+
+/// Try to extract usage from this specific set of fields.
+///
+/// Uses verified field numbers from the binary's embedded proto descriptor.
+fn try_extract_usage(fields: &[ProtoField]) -> Option<GrpcUsage> {
+    // We need:
+    // - At least 2 varint fields with values in token range
+    // - Ideally field 2 (input_tokens) or field 3 (output_tokens) present
+    let varint_fields: Vec<_> = fields
+        .iter()
+        .filter(|f| matches!(f.value, ProtoValue::Varint(_)))
+        .collect();
+
+    let string_fields: Vec<_> = fields
+        .iter()
+        .filter_map(|f| {
+            if let ProtoValue::Bytes(ref b) = f.value {
+                std::str::from_utf8(b).ok().map(|s| (f.number, s.to_string()))
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    // Need at least 2 varint fields to be a candidate
+    if varint_fields.len() < 2 {
+        return None;
+    }
+
+    // Check if the varint values make sense as token counts
+    let plausible_token_count = |v: u64| v <= 10_000_000;
+    let plausible_varints = varint_fields
+        .iter()
+        .filter(|f| {
+            if let ProtoValue::Varint(v) = f.value {
+                plausible_token_count(v) && v > 0
+            } else {
+                false
+            }
+        })
+        .count();
+
+    // Need at least 2 non-zero plausible values
+    if plausible_varints < 2 {
+        return None;
+    }
+
+    // Check if there's a model-like string (field 7 = message_id or field 11 = response_id
+    // can contain model names, or model enum values map to known names)
+    let has_model_string = string_fields.iter().any(|(_, s)| {
+        s.contains("claude") || s.contains("gemini") || s.contains("gpt")
+            || s.starts_with("models/") || s.contains("sonnet") || s.contains("opus")
+            || s.contains("flash") || s.contains("pro")
+    });
+
+    // Check for fields at the known ModelUsageStats field numbers
+    let has_field_2 = fields.iter().any(|f| f.number == 2 && matches!(f.value, ProtoValue::Varint(_)));
+    let has_field_3 = fields.iter().any(|f| f.number == 3 && matches!(f.value, ProtoValue::Varint(_)));
+
+    // Strong signal: has both input and output token fields
+    let is_likely_usage = (has_field_2 && has_field_3) || has_model_string;
+
+    if !is_likely_usage && varint_fields.len() < 3 {
+        // Without strong signal, need more fields
+        return None;
+    }
+
+    // Build usage from exact field numbers (verified from binary)
+    let mut usage = GrpcUsage::default();
+
+    for field in fields {
+        match &field.value {
+            ProtoValue::Varint(v) => {
+                let v = *v;
+                if !plausible_token_count(v) {
+                    continue;
+                }
+                match field.number {
+                    // field 1 = model enum (varint, not string!)
+                    2 => usage.input_tokens = v,
+                    3 => usage.output_tokens = v,
+                    4 => usage.cache_write_tokens = v,    // VERIFIED: field 4
+                    5 => usage.cache_read_tokens = v,     // VERIFIED: field 5
+                    // field 6 = api_provider enum (varint)
+                    9 => usage.thinking_output_tokens = v, // VERIFIED: field 9
+                    10 => usage.response_output_tokens = v, // VERIFIED: field 10
+                    _ => {}
+                }
+            }
+            ProtoValue::Bytes(ref b) => {
+                if let Ok(s) = std::str::from_utf8(b) {
+                    match field.number {
+                        7 => usage.message_id = Some(s.to_string()),
+                        11 => usage.response_id = Some(s.to_string()),
+                        _ => {}
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+
+    // Model and api_provider are enums (varints), not strings
+    // We can map known enum values later if needed
+    // For now, extract the enum value as a string representation
+    for field in fields {
+        if let ProtoValue::Varint(v) = &field.value {
+            match field.number {
+                1 => {
+                    // Model enum — we don't have the mapping, store as number
+                    usage.model = Some(format!("model_enum_{v}"));
+                }
+                6 => {
+                    // APIProvider enum
+                    usage.api_provider = Some(match *v {
+                        0 => "unknown".to_string(),
+                        1 => "google".to_string(),
+                        2 => "anthropic".to_string(),
+                        _ => format!("provider_{v}"),
+                    });
+                }
+                _ => {}
+            }
+        }
+    }
+
+    // Validate — we should have at least input OR output tokens
+    if usage.input_tokens == 0 && usage.output_tokens == 0 {
+        return None;
+    }
+
+    debug!(
+        input = usage.input_tokens,
+        output = usage.output_tokens,
+        thinking = usage.thinking_output_tokens,
+        response = usage.response_output_tokens,
+        cache_read = usage.cache_read_tokens,
+        cache_write = usage.cache_write_tokens,
+        model = ?usage.model,
+        api_provider = ?usage.api_provider,
+        "Proto: extracted ModelUsageStats from protobuf"
+    );
+
+    Some(usage)
+}
+
+/// Parse a gRPC response body (may contain multiple messages) for usage data.
+///
+/// Handles both compressed and uncompressed gRPC frames.
+pub fn parse_grpc_response_for_usage(body: &[u8]) -> Option<GrpcUsage> {
+    let messages = extract_grpc_messages(body);
+
+    trace!(count = messages.len(), "Proto: extracted gRPC messages");
+
+    // Check each message for usage data (last message usually has it)
+    for msg in messages.iter().rev() {
+        let fields = decode_proto(msg);
+        if let Some(usage) = extract_usage_from_proto(&fields) {
+            return Some(usage);
+        }
+    }
+
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_read_varint() {
+        assert_eq!(read_varint(&[0x00]), Some((0, 1)));
+        assert_eq!(read_varint(&[0x01]), Some((1, 1)));
+        assert_eq!(read_varint(&[0x96, 0x01]), Some((150, 2)));
+        assert_eq!(read_varint(&[0xAC, 0x02]), Some((300, 2)));
+    }
+
+    #[test]
+    fn test_extract_grpc_messages_uncompressed() {
+        // Construct a test gRPC frame: [0x00] [0x00, 0x00, 0x00, 0x05] [5 bytes data]
+        let mut buf = vec![0u8]; // not compressed
+        buf.extend_from_slice(&5u32.to_be_bytes());
+        buf.extend_from_slice(&[0x08, 0x96, 0x01, 0x10, 0x42]); // field 1 varint 150, field 2 varint 66
+
+        let messages = extract_grpc_messages(&buf);
+        assert_eq!(messages.len(), 1);
+        assert_eq!(messages[0].len(), 5);
+    }
+
+    #[test]
+    fn test_extract_grpc_messages_compressed() {
+        use flate2::write::GzEncoder;
+        use flate2::Compression;
+        use std::io::Write;
+
+        // Create a payload
+        let payload = vec![0x08, 0x96, 0x01, 0x10, 0x42];
+
+        // Compress it
+        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+        encoder.write_all(&payload).unwrap();
+        let compressed = encoder.finish().unwrap();
+
+        // Build gRPC frame with compressed flag
+        let mut buf = vec![1u8]; // compressed
+        buf.extend_from_slice(&(compressed.len() as u32).to_be_bytes());
+        buf.extend_from_slice(&compressed);
+
+        let messages = extract_grpc_messages(&buf);
+        assert_eq!(messages.len(), 1);
+        assert_eq!(messages[0], payload);
+    }
+
+    #[test]
+    fn test_decode_proto_varints() {
+        // field 1 = 150, field 2 = 66
+        let data = [0x08, 0x96, 0x01, 0x10, 0x42];
+        let fields = decode_proto(&data);
+        assert_eq!(fields.len(), 2);
+        assert_eq!(fields[0].number, 1);
+        assert!(matches!(fields[0].value, ProtoValue::Varint(150)));
+        assert_eq!(fields[1].number, 2);
+        assert!(matches!(fields[1].value, ProtoValue::Varint(66)));
+    }
+
+    #[test]
+    fn test_decode_proto_with_string() {
+        // field 1 = "hello" (string), field 2 = varint 42
+        let mut data = Vec::new();
+        // field 1, wire type 2 (length-delimited)
+        data.push(0x0A); // (1 << 3) | 2
+        data.push(0x05); // length 5
+        data.extend_from_slice(b"hello");
+        // field 2, wire type 0 (varint)
+        data.push(0x10); // (2 << 3) | 0
+        data.push(0x2A); // 42
+
+        let fields = decode_proto(&data);
+        assert!(fields.len() >= 2);
+        assert_eq!(fields[0].number, 1);
+    }
+
+    #[test]
+    fn test_extract_usage_correct_field_numbers() {
+        // Build a mock ModelUsageStats with the correct field numbers:
+        //   field 1 (model enum) = 5 (some model)
+        //   field 2 (input_tokens) = 1000
+        //   field 3 (output_tokens) = 500
+        //   field 4 (cache_write_tokens) = 100
+        //   field 5 (cache_read_tokens) = 200
+        //   field 9 (thinking_output_tokens) = 300
+        //   field 10 (response_output_tokens) = 200
+        let mut data = Vec::new();
+
+        // Helper: encode varint field
+        fn encode_varint_field(data: &mut Vec<u8>, field_num: u32, value: u64) {
+            // Tag
+            let tag = (field_num << 3) | 0; // wire type 0
+            let mut t = tag;
+            while t >= 0x80 {
+                data.push((t as u8) | 0x80);
+                t >>= 7;
+            }
+            data.push(t as u8);
+            // Value
+            let mut v = value;
+            while v >= 0x80 {
+                data.push((v as u8) | 0x80);
+                v >>= 7;
+            }
+            data.push(v as u8);
+        }
+
+        encode_varint_field(&mut data, 1, 5);      // model enum
+        encode_varint_field(&mut data, 2, 1000);    // input_tokens
+        encode_varint_field(&mut data, 3, 500);     // output_tokens
+        encode_varint_field(&mut data, 4, 100);     // cache_write_tokens
+        encode_varint_field(&mut data, 5, 200);     // cache_read_tokens
+        encode_varint_field(&mut data, 9, 300);     // thinking_output_tokens
+        encode_varint_field(&mut data, 10, 200);    // response_output_tokens
+
+        let fields = decode_proto(&data);
+        let usage = try_extract_usage(&fields).expect("should extract usage");
+
+        assert_eq!(usage.input_tokens, 1000);
+        assert_eq!(usage.output_tokens, 500);
+        assert_eq!(usage.cache_write_tokens, 100);
+        assert_eq!(usage.cache_read_tokens, 200);
+        assert_eq!(usage.thinking_output_tokens, 300);
+        assert_eq!(usage.response_output_tokens, 200);
+    }
+}
diff --git a/src/mitm/proxy.rs b/src/mitm/proxy.rs
new file mode 100644
index 0000000..9a3993a
--- /dev/null
+++ b/src/mitm/proxy.rs
@@ -0,0 +1,591 @@
+//! MITM proxy server: handles CONNECT tunnels and TLS interception.
+//!
+//! Listens on a local port for HTTP CONNECT requests from the LS.
+//! For intercepted domains, it terminates TLS with our CA-signed cert,
+//! reads/modifies the request, forwards to the real upstream, and captures
+//! the response (especially usage data).
+//!
+//! For non-intercepted domains, it acts as a transparent TCP tunnel.
+
+use super::ca::MitmCa;
+use super::intercept::{
+    extract_cascade_hint, parse_non_streaming_response, parse_streaming_chunk,
+    StreamingAccumulator,
+};
+use super::store::MitmStore;
+use std::sync::Arc;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::net::{TcpListener, TcpStream};
+use tokio_rustls::TlsAcceptor;
+use tracing::{debug, error, info, trace, warn};
+
+/// Domains we intercept (terminate TLS and inspect traffic).
+/// This includes exact matches and suffix matches for regional endpoints
+/// (e.g., us-central1-aiplatform.googleapis.com).
+const INTERCEPT_DOMAINS: &[&str] = &[
+    "cloudcode-pa.googleapis.com",
+    "aiplatform.googleapis.com",
+    "api.anthropic.com",
+    "speech.googleapis.com",
+    "modelarmor.googleapis.com",
+];
+
+/// Domains we NEVER intercept (transparent tunnel).
+const PASSTHROUGH_DOMAINS: &[&str] = &[
+    "oauth2.googleapis.com",
+    "accounts.google.com",
+    "storage.googleapis.com",
+    "www.googleapis.com",
+    "firebaseinstallations.googleapis.com",
+    "crashlyticsreports-pa.googleapis.com",
+    "play.googleapis.com",
+    "update.googleapis.com",
+    "dl.google.com",
+];
+
+/// Configuration for the MITM proxy.
+pub struct MitmConfig {
+    /// Port to listen on (0 = auto-assign).
+    pub port: u16,
+    /// Whether to enable request modification.
+    pub modify_requests: bool,
+}
+
+impl Default for MitmConfig {
+    fn default() -> Self {
+        Self {
+            port: 0,
+            modify_requests: false,
+        }
+    }
+}
+
+/// Run the MITM proxy server.
+///
+/// Returns (port, task_handle) — port it's listening on, handle to abort on shutdown.
+pub async fn run(
+    ca: Arc<MitmCa>,
+    store: MitmStore,
+    config: MitmConfig,
+) -> Result<(u16, tokio::task::JoinHandle<()>), String> {
+    let listener = TcpListener::bind(format!("127.0.0.1:{}", config.port))
+        .await
+        .map_err(|e| format!("MITM bind failed: {e}"))?;
+
+    let port = listener
+        .local_addr()
+        .map_err(|e| format!("MITM local_addr failed: {e}"))?
+        .port();
+
+    info!(port, "MITM proxy listening");
+
+    let modify_requests = config.modify_requests;
+
+    let handle = tokio::spawn(async move {
+        loop {
+            match listener.accept().await {
+                Ok((stream, addr)) => {
+                    trace!(?addr, "MITM: new connection");
+                    let ca = ca.clone();
+                    let store = store.clone();
+                    tokio::spawn(async move {
+                        if let Err(e) = handle_connection(stream, ca, store, modify_requests).await {
+                            debug!(error = %e, "MITM connection error");
+                        }
+                    });
+                }
+                Err(e) => {
+                    error!(error = %e, "MITM accept error");
+                }
+            }
+        }
+    });
+
+    Ok((port, handle))
+}
+
+/// Handle a single incoming connection from the LS.
+///
+/// The LS sends an HTTP CONNECT request to establish a tunnel.
+/// We then decide whether to intercept or passthrough.
+async fn handle_connection(
+    mut stream: TcpStream,
+    ca: Arc<MitmCa>,
+    store: MitmStore,
+    modify_requests: bool,
+) -> Result<(), String> {
+    // Read the CONNECT request
+    let mut buf = vec![0u8; 8192];
+    let n = stream
+        .read(&mut buf)
+        .await
+        .map_err(|e| format!("Read CONNECT: {e}"))?;
+
+    if n == 0 {
+        return Ok(());
+    }
+
+    let request = String::from_utf8_lossy(&buf[..n]);
+    let first_line = request.lines().next().unwrap_or("");
+
+    // Parse "CONNECT host:port HTTP/1.1"
+    let parts: Vec<&str> = first_line.split_whitespace().collect();
+    if parts.len() < 3 || parts[0] != "CONNECT" {
+        // Not a CONNECT request — return 400
+        let resp = "HTTP/1.1 400 Bad Request\r\n\r\n";
+        let _ = stream.write_all(resp.as_bytes()).await;
+        return Ok(());
+    }
+
+    let host_port = parts[1];
+    let (domain, _port) = match host_port.rsplit_once(':') {
+        Some((h, p)) => (h, p.parse::<u16>().unwrap_or(443)),
+        None => (host_port, 443),
+    };
+
+    debug!(domain, "MITM: CONNECT request");
+
+    // Decide: intercept or passthrough
+    let should_intercept = should_intercept_domain(domain);
+
+    // Send 200 Connection Established
+    let response = "HTTP/1.1 200 Connection Established\r\n\r\n";
+    stream
+        .write_all(response.as_bytes())
+        .await
+        .map_err(|e| format!("Write 200: {e}"))?;
+
+    if should_intercept {
+        handle_intercepted(stream, domain, ca, store, modify_requests).await
+    } else {
+        handle_passthrough(stream, domain, _port).await
+    }
+}
+
+/// Check if a domain should be intercepted.
+fn should_intercept_domain(domain: &str) -> bool {
+    // Never intercept passthrough domains
+    for &pt in PASSTHROUGH_DOMAINS {
+        if domain == pt {
+            return false;
+        }
+    }
+
+    // Intercept known API domains (exact match, subdomain, or regional prefix)
+    for &intercept in INTERCEPT_DOMAINS {
+        if domain == intercept
+            || domain.ends_with(&format!(".{intercept}"))
+            || domain.ends_with(&format!("-{intercept}"))
+        {
+            return true;
+        }
+    }
+
+    // Default: passthrough
+    false
+}
+
+/// Handle an intercepted connection: terminate TLS, inspect traffic.
+///
+/// After TLS termination, checks the negotiated ALPN protocol:
+/// - `h2` → HTTP/2 handler (for gRPC traffic to Google APIs)
+/// - `http/1.1` or none → HTTP/1.1 handler (for REST/SSE traffic)
+async fn handle_intercepted(
+    stream: TcpStream,
+    domain: &str,
+    ca: Arc<MitmCa>,
+    store: MitmStore,
+    modify_requests: bool,
+) -> Result<(), String> {
+    info!(domain, "MITM: intercepting TLS");
+
+    // Get or create server TLS config for this domain
+    let server_config = ca
+        .server_config_for_domain(domain)
+        .await?;
+
+    let acceptor = TlsAcceptor::from(server_config);
+
+    // Perform TLS handshake with the client (LS)
+    let tls_stream = acceptor
+        .accept(stream)
+        .await
+        .map_err(|e| format!("TLS handshake with client failed for {domain}: {e}"))?;
+
+    // Check negotiated ALPN protocol
+    let alpn = tls_stream.get_ref().1
+        .alpn_protocol()
+        .map(|p| String::from_utf8_lossy(p).to_string());
+
+    debug!(domain, alpn = ?alpn, "MITM: TLS handshake successful");
+
+    match alpn.as_deref() {
+        Some("h2") => {
+            // HTTP/2 — use the hyper-based gRPC handler
+            info!(domain, "MITM: routing to HTTP/2 handler (gRPC)");
+            super::h2_handler::handle_h2_connection(
+                tls_stream,
+                domain.to_string(),
+                store,
+            )
+            .await
+        }
+        _ => {
+            // HTTP/1.1 or no ALPN — use the existing handler
+            debug!(domain, "MITM: routing to HTTP/1.1 handler");
+            handle_http_over_tls(tls_stream, domain, store, modify_requests).await
+        }
+    }
+}
+
+/// Handle HTTP traffic over the decrypted TLS connection.
+///
+/// Loops to handle multiple requests on the same connection (HTTP keep-alive).
+/// Reads full request, connects to upstream, forwards request, streams response
+/// back to client while capturing usage data.
+async fn handle_http_over_tls(
+    mut client: tokio_rustls::server::TlsStream<TcpStream>,
+    domain: &str,
+    store: MitmStore,
+    _modify_requests: bool,
+) -> Result<(), String> {
+    let mut tmp = vec![0u8; 32768];
+
+    // Build upstream TLS connector once for this connection
+    let mut root_store = rustls::RootCertStore::empty();
+    let native_certs = rustls_native_certs::load_native_certs();
+    for cert in native_certs.certs {
+        let _ = root_store.add(cert);
+    }
+    let upstream_config = Arc::new(
+        rustls::ClientConfig::builder()
+            .with_root_certificates(root_store)
+            .with_no_client_auth(),
+    );
+
+    // Reusable upstream connection — created lazily, reconnected if stale
+    let mut upstream: Option<tokio_rustls::client::TlsStream<TcpStream>> = None;
+
+    /// Connect (or reconnect) to the real upstream via TLS.
+    async fn connect_upstream(
+        domain: &str,
+        config: &Arc<rustls::ClientConfig>,
+    ) -> Result<tokio_rustls::client::TlsStream<TcpStream>, String> {
+        let connector = tokio_rustls::TlsConnector::from(config.clone());
+        let tcp = TcpStream::connect(format!("{domain}:443"))
+            .await
+            .map_err(|e| format!("Connect to upstream {domain}: {e}"))?;
+        let server_name = rustls::pki_types::ServerName::try_from(domain.to_string())
+            .map_err(|e| format!("Invalid server name: {e}"))?;
+        connector
+            .connect(server_name, tcp)
+            .await
+            .map_err(|e| format!("TLS connect to upstream {domain}: {e}"))
+    }
+
+    // Keep-alive loop: handle multiple requests on this connection
+    loop {
+        // ── Read the HTTP request from the client ─────────────────────────
+        let mut request_buf = Vec::with_capacity(1024 * 64);
+
+        loop {
+            let n = match client.read(&mut tmp).await {
+                Ok(0) => return Ok(()), // Client closed connection cleanly
+                Ok(n) => n,
+                Err(e) => {
+                    // Connection reset / broken pipe is normal for keep-alive end
+                    debug!(domain, error = %e, "MITM: client read finished");
+                    return Ok(());
+                }
+            };
+
+            request_buf.extend_from_slice(&tmp[..n]);
+
+            // Check if we have the full request (headers + body)
+            if has_complete_http_request(&request_buf) {
+                break;
+            }
+        }
+
+        if request_buf.is_empty() {
+            return Ok(());
+        }
+
+        // Parse the HTTP request to find headers and body
+        let (headers_end, content_length, is_streaming_request) = parse_http_request_meta(&request_buf);
+
+        // Try to extract cascade hint from request body
+        let cascade_hint = if headers_end < request_buf.len() {
+            extract_cascade_hint(&request_buf[headers_end..])
+        } else {
+            None
+        };
+
+        debug!(
+            domain,
+            content_length,
+            streaming = is_streaming_request,
+            cascade = ?cascade_hint,
+            "MITM: forwarding request to upstream"
+        );
+
+        // ── Ensure upstream connection is alive ──────────────────────────────
+        // Lazily connect on first request, or reconnect if the previous connection died
+        let conn = match upstream.as_mut() {
+            Some(c) => c,
+            None => {
+                let c = connect_upstream(domain, &upstream_config).await?;
+                upstream.insert(c)
+            }
+        };
+
+        // Forward the request — if write fails, reconnect and retry once
+        if let Err(e) = conn.write_all(&request_buf).await {
+            debug!(domain, error = %e, "MITM: upstream write failed, reconnecting");
+            let c = connect_upstream(domain, &upstream_config).await?;
+            let conn = upstream.insert(c);
+            conn.write_all(&request_buf)
+                .await
+                .map_err(|e| format!("Write to upstream (retry): {e}"))?;
+        }
+
+        let conn = upstream.as_mut().unwrap();
+
+        // ── Stream response back to client ──────────────────────────────────
+        let mut streaming_acc = StreamingAccumulator::new();
+        let mut is_streaming_response = false;
+        let mut headers_parsed = false;
+        // Only buffer response body for non-streaming (for usage parsing)
+        let mut non_streaming_buf: Option<Vec<u8>> = None;
+        // Track if upstream connection is still usable after this response
+        let mut upstream_ok = true;
+
+        // Per-request timeout: 5 minutes (covers large context API calls)
+        const READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(300);
+
+        loop {
+            let n = match tokio::time::timeout(READ_TIMEOUT, conn.read(&mut tmp)).await {
+                Ok(Ok(0)) => {
+                    // Upstream closed — connection is no longer reusable
+                    upstream_ok = false;
+                    break;
+                }
+                Ok(Ok(n)) => n,
+                Ok(Err(e)) => {
+                    debug!(domain, error = %e, "MITM: upstream read finished");
+                    upstream_ok = false;
+                    break;
+                }
+                Err(_) => {
+                    warn!(domain, "MITM: upstream read timed out after 5 minutes");
+                    upstream_ok = false;
+                    break;
+                }
+            };
+
+            let chunk = &tmp[..n];
+
+            // Check response headers for content-type
+            if !headers_parsed {
+                // We need to buffer until we see the end of headers
+                let buf = non_streaming_buf.get_or_insert_with(|| Vec::with_capacity(1024 * 64));
+                buf.extend_from_slice(chunk);
+                if let Some(_hdr_end) = find_headers_end(buf) {
+                    // Use httparse for response header parsing
+                    let mut resp_headers = [httparse::EMPTY_HEADER; 64];
+                    let mut resp = httparse::Response::new(&mut resp_headers);
+                    let hdr_end = match resp.parse(buf) {
+                        Ok(httparse::Status::Complete(n)) => n,
+                        _ => _hdr_end, // Fallback to manual detection
+                    };
+
+                    // Detect content type and connection handling from parsed headers
+                    for header in resp.headers.iter() {
+                        if header.name.eq_ignore_ascii_case("content-type") {
+                            if let Ok(val) = std::str::from_utf8(header.value) {
+                                if val.contains("text/event-stream") {
+                                    is_streaming_response = true;
+                                }
+                            }
+                        }
+                        if header.name.eq_ignore_ascii_case("connection") {
+                            if let Ok(val) = std::str::from_utf8(header.value) {
+                                if val.trim().eq_ignore_ascii_case("close") {
+                                    upstream_ok = false;
+                                }
+                            }
+                        }
+                    }
+
+                    headers_parsed = true;
+
+                    if is_streaming_response {
+                        // For streaming, parse any SSE data already in the buffer
+                        let body_so_far = String::from_utf8_lossy(&buf[hdr_end..]);
+                        if !body_so_far.is_empty() {
+                            parse_streaming_chunk(&body_so_far, &mut streaming_acc);
+                        }
+                        // Forward the accumulated buffer to client
+                        if let Err(e) = client.write_all(buf).await {
+                            warn!(error = %e, "MITM: write to client failed");
+                            break;
+                        }
+                        non_streaming_buf = None;
+                        continue;
+                    }
+                    // Non-streaming: keep buffering the response body for parsing
+                    continue;
+                }
+                continue;
+            }
+
+            // If streaming, parse SSE events and forward immediately
+            if is_streaming_response {
+                let chunk_str = String::from_utf8_lossy(chunk);
+                parse_streaming_chunk(&chunk_str, &mut streaming_acc);
+
+                if let Err(e) = client.write_all(chunk).await {
+                    warn!(error = %e, "MITM: write to client failed (client disconnected?)");
+                    break;
+                }
+            } else {
+                // Non-streaming: keep accumulating to parse usage at the end
+                if let Some(ref mut buf) = non_streaming_buf {
+                    buf.extend_from_slice(chunk);
+                }
+            }
+        }
+
+        // Forward non-streaming response all at once
+        if !is_streaming_response {
+            if let Some(ref buf) = non_streaming_buf {
+                if let Err(e) = client.write_all(buf).await {
+                    warn!(error = %e, "MITM: write to client failed");
+                }
+            }
+        }
+
+        // Capture usage data
+        if is_streaming_response {
+            if streaming_acc.is_complete || streaming_acc.output_tokens > 0 {
+                let usage = streaming_acc.into_usage();
+                store.record_usage(cascade_hint.as_deref(), usage).await;
+            }
+        } else if let Some(ref buf) = non_streaming_buf {
+            if let Some(body_start) = find_headers_end(buf) {
+                let body = &buf[body_start..];
+                if let Some(usage) = parse_non_streaming_response(body) {
+                    store.record_usage(cascade_hint.as_deref(), usage).await;
+                }
+            }
+        }
+
+        // If upstream closed, drop the connection so next iteration reconnects
+        if !upstream_ok {
+            upstream = None;
+        }
+    } // end keep-alive loop
+}
+
+/// Handle a passthrough connection: transparent TCP tunnel to upstream.
+async fn handle_passthrough(
+    mut client: TcpStream,
+    domain: &str,
+    port: u16,
+) -> Result<(), String> {
+    trace!(domain, port, "MITM: transparent tunnel");
+
+    let mut upstream = TcpStream::connect(format!("{domain}:{port}"))
+        .await
+        .map_err(|e| format!("Connect to {domain}:{port}: {e}"))?;
+
+    // Bidirectional copy
+    match tokio::io::copy_bidirectional(&mut client, &mut upstream).await {
+        Ok((client_to_server, server_to_client)) => {
+            trace!(domain, client_to_server, server_to_client, "MITM: tunnel closed");
+        }
+        Err(e) => {
+            trace!(domain, error = %e, "MITM: tunnel error (likely clean close)");
+        }
+    }
+
+    Ok(())
+}
+
+/// Check if buffer contains a complete HTTP request (headers + full body).
+/// Uses `httparse` for zero-copy, case-insensitive header parsing.
+fn has_complete_http_request(buf: &[u8]) -> bool {
+    let mut headers = [httparse::EMPTY_HEADER; 64];
+    let mut req = httparse::Request::new(&mut headers);
+
+    let headers_end = match req.parse(buf) {
+        Ok(httparse::Status::Complete(n)) => n,
+        _ => return false, // Incomplete or parse error — need more data
+    };
+
+    // Look for Content-Length
+    for header in req.headers.iter() {
+        if header.name.eq_ignore_ascii_case("content-length") {
+            if let Ok(val) = std::str::from_utf8(header.value) {
+                if let Ok(len) = val.trim().parse::<usize>() {
+                    return buf.len() >= headers_end + len;
+                }
+            }
+        }
+
+        if header.name.eq_ignore_ascii_case("transfer-encoding") {
+            if let Ok(val) = std::str::from_utf8(header.value) {
+                if val.trim().eq_ignore_ascii_case("chunked") {
+                    let body = &buf[headers_end..];
+                    return body.len() >= 5 && body.ends_with(b"0\r\n\r\n");
+                }
+            }
+        }
+    }
+
+    // No Content-Length or Transfer-Encoding — no body expected (e.g., GET)
+    true
+}
+
+/// Find the end of HTTP headers (position after \r\n\r\n).
+fn find_headers_end(buf: &[u8]) -> Option<usize> {
+    buf.windows(4)
+        .position(|w| w == b"\r\n\r\n")
+        .map(|pos| pos + 4)
+}
+
+/// Parse HTTP request metadata from raw bytes using `httparse`.
+/// Returns (headers_end, content_length, is_streaming_request).
+fn parse_http_request_meta(buf: &[u8]) -> (usize, usize, bool) {
+    let mut headers = [httparse::EMPTY_HEADER; 64];
+    let mut req = httparse::Request::new(&mut headers);
+
+    let headers_end = match req.parse(buf) {
+        Ok(httparse::Status::Complete(n)) => n,
+        _ => {
+            // Fallback if httparse can't parse
+            return (find_headers_end(buf).unwrap_or(buf.len()), 0, false);
+        }
+    };
+
+    let mut content_length = 0usize;
+
+    for header in req.headers.iter() {
+        if header.name.eq_ignore_ascii_case("content-length") {
+            if let Ok(val) = std::str::from_utf8(header.value) {
+                content_length = val.trim().parse().unwrap_or(0);
+            }
+        }
+    }
+
+    // Check if request body asks for streaming
+    let is_streaming = if headers_end < buf.len() {
+        let body_str = String::from_utf8_lossy(&buf[headers_end..]);
+        body_str.contains("\"stream\":true") || body_str.contains("\"stream\": true")
+    } else {
+        false
+    };
+
+    (headers_end, content_length, is_streaming)
+}
+
diff --git a/src/mitm/store.rs b/src/mitm/store.rs
new file mode 100644
index 0000000..0dd1ed4
--- /dev/null
+++ b/src/mitm/store.rs
@@ -0,0 +1,163 @@
+//! Shared store for intercepted API usage data.
+//!
+//! The MITM proxy writes usage data here; the API handlers read from it.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::RwLock;
+use serde::{Deserialize, Serialize};
+use tracing::debug;
+
+/// Token usage from an intercepted API response.
+///
+/// Covers both Anthropic JSON/SSE responses and Google gRPC protobuf responses.
+/// Fields map to the superset of Anthropic's `usage` object and Google's `ModelUsageStats` proto.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ApiUsage {
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    /// Anthropic: cache_creation_input_tokens / Google: cache_write_tokens
+    pub cache_creation_input_tokens: u64,
+    /// Anthropic: cache_read_input_tokens / Google: cache_read_tokens
+    pub cache_read_input_tokens: u64,
+    /// Google-specific: thinking/reasoning output tokens (extended thinking)
+    pub thinking_output_tokens: u64,
+    /// Google-specific: response output tokens (non-thinking portion)
+    pub response_output_tokens: u64,
+    /// Total cost in USD (if provided by the API).
+    pub total_cost_usd: Option<f64>,
+    /// The actual model that served the request.
+    pub model: Option<String>,
+    /// Stop reason / finish reason from the API.
+    pub stop_reason: Option<String>,
+    /// API provider (e.g. "anthropic", "google")
+    pub api_provider: Option<String>,
+    /// gRPC method path (e.g. "/google.internal.cloud.code.v1internal.PredictionService/GenerateContent")
+    pub grpc_method: Option<String>,
+    /// Timestamp when this usage was captured.
+    pub captured_at: u64,
+}
+
+/// Thread-safe store for intercepted data.
+///
+/// Keyed by a unique request ID that we can correlate with cascade operations.
+/// In practice, we use the cascade ID + a sequence number.
+#[derive(Clone)]
+pub struct MitmStore {
+    /// Most recent usage per cascade ID.
+    latest_usage: Arc<RwLock<HashMap<String, ApiUsage>>>,
+    /// Global aggregate stats.
+    stats: Arc<RwLock<MitmStats>>,
+}
+
+/// Aggregate statistics across all intercepted traffic.
+#[derive(Debug, Clone, Default, Serialize)]
+pub struct MitmStats {
+    pub total_requests: u64,
+    pub total_input_tokens: u64,
+    pub total_output_tokens: u64,
+    pub total_cache_read_tokens: u64,
+    pub total_cache_creation_tokens: u64,
+    pub total_thinking_output_tokens: u64,
+    pub total_response_output_tokens: u64,
+    /// Per-model usage breakdown (model name → stats).
+    pub per_model: HashMap<String, ModelStats>,
+}
+
+/// Per-model usage counters.
+#[derive(Debug, Clone, Default, Serialize)]
+pub struct ModelStats {
+    pub requests: u64,
+    pub input_tokens: u64,
+    pub output_tokens: u64,
+    pub cache_read_tokens: u64,
+    pub cache_creation_tokens: u64,
+}
+
+impl MitmStore {
+    pub fn new() -> Self {
+        Self {
+            latest_usage: Arc::new(RwLock::new(HashMap::new())),
+            stats: Arc::new(RwLock::new(MitmStats::default())),
+        }
+    }
+
+    /// Record a completed API exchange with usage data.
+    pub async fn record_usage(&self, cascade_id: Option<&str>, usage: ApiUsage) {
+        debug!(
+            input = usage.input_tokens,
+            output = usage.output_tokens,
+            cache_read = usage.cache_read_input_tokens,
+            cache_create = usage.cache_creation_input_tokens,
+            thinking = usage.thinking_output_tokens,
+            response = usage.response_output_tokens,
+            model = ?usage.model,
+            provider = ?usage.api_provider,
+            grpc = ?usage.grpc_method,
+            "MITM captured API usage"
+        );
+
+        // Update aggregate stats
+        {
+            let mut stats = self.stats.write().await;
+            stats.total_requests += 1;
+            stats.total_input_tokens += usage.input_tokens;
+            stats.total_output_tokens += usage.output_tokens;
+            stats.total_cache_read_tokens += usage.cache_read_input_tokens;
+            stats.total_cache_creation_tokens += usage.cache_creation_input_tokens;
+            stats.total_thinking_output_tokens += usage.thinking_output_tokens;
+            stats.total_response_output_tokens += usage.response_output_tokens;
+
+            // Per-model breakdown
+            if let Some(ref model_name) = usage.model {
+                let model_stats = stats.per_model.entry(model_name.clone()).or_default();
+                model_stats.requests += 1;
+                model_stats.input_tokens += usage.input_tokens;
+                model_stats.output_tokens += usage.output_tokens;
+                model_stats.cache_read_tokens += usage.cache_read_input_tokens;
+                model_stats.cache_creation_tokens += usage.cache_creation_input_tokens;
+            }
+        }
+
+        // Store latest usage for the cascade (if we can identify it)
+        let key = cascade_id.map(|s| s.to_string()).unwrap_or_else(|| "_latest".to_string());
+        let mut latest = self.latest_usage.write().await;
+        latest.insert(key, usage);
+
+        // Evict old entries to prevent unbounded memory growth
+        const MAX_ENTRIES: usize = 500;
+        if latest.len() > MAX_ENTRIES {
+            // Find the oldest entry by captured_at and remove it
+            let oldest_key = latest
+                .iter()
+                .min_by_key(|(_, v)| v.captured_at)
+                .map(|(k, _)| k.clone());
+            if let Some(key) = oldest_key {
+                latest.remove(&key);
+            }
+        }
+    }
+
+    /// Get the latest usage for a cascade, consuming it (one-shot read).
+    ///
+    /// Only returns exact cascade_id matches — no cross-cascade fallback.
+    /// The `_latest` key is only consumed when the caller explicitly requests it
+    /// (i.e., when the MITM couldn't identify the cascade).
+    pub async fn take_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
+        let mut latest = self.latest_usage.write().await;
+        latest.remove(cascade_id)
+    }
+
+    /// Peek at the latest usage without consuming it.
+    #[allow(dead_code)]
+    pub async fn peek_usage(&self, cascade_id: &str) -> Option<ApiUsage> {
+        let latest = self.latest_usage.read().await;
+        latest.get(cascade_id)
+            .cloned()
+    }
+
+    /// Get aggregate stats.
+    pub async fn stats(&self) -> MitmStats {
+        self.stats.read().await.clone()
+    }
+}
diff --git a/src/proto.rs b/src/proto.rs
new file mode 100644
index 0000000..bb558c9
--- /dev/null
+++ b/src/proto.rs
@@ -0,0 +1,233 @@
+//! Protobuf wire-format encoder — byte-exact match to the real Antigravity webview.
+//!
+//! This is a minimal, hand-rolled encoder. We do NOT use prost or any codegen
+//! because we need precise control over field ordering and encoding to produce
+//! byte-identical output to the captured webview traffic.
+
+use crate::constants::{client_version, CLIENT_NAME};
+
+// ─── Wire primitives ────────────────────────────────────────────────────────
+
+/// Encode a varint (base-128, little-endian, MSB continuation).
+pub fn varint(mut val: u64) -> Vec<u8> {
+    if val == 0 {
+        return vec![0x00];
+    }
+    let mut out = Vec::with_capacity(10);
+    while val > 0x7F {
+        out.push(((val & 0x7F) | 0x80) as u8);
+        val >>= 7;
+    }
+    out.push((val & 0x7F) as u8);
+    out
+}
+
+/// Encode a field tag (field_number << 3 | wire_type).
+pub fn tag(field: u32, wire: u8) -> Vec<u8> {
+    varint(((field as u64) << 3) | (wire as u64))
+}
+
+/// Wire type 2: length-delimited string/bytes field.
+pub fn proto_string(field: u32, val: &[u8]) -> Vec<u8> {
+    let mut out = tag(field, 2);
+    out.extend(varint(val.len() as u64));
+    out.extend_from_slice(val);
+    out
+}
+
+/// Wire type 2: length-delimited sub-message field.
+pub fn proto_message(field: u32, inner: &[u8]) -> Vec<u8> {
+    let mut out = tag(field, 2);
+    out.extend(varint(inner.len() as u64));
+    out.extend_from_slice(inner);
+    out
+}
+
+/// Wire type 0: boolean field (varint 0 or 1).
+pub fn bool_field(field: u32, val: bool) -> Vec<u8> {
+    let mut out = tag(field, 0);
+    out.extend(varint(if val { 1 } else { 0 }));
+    out
+}
+
+/// Wire type 0: varint field.
+pub fn varint_field(field: u32, val: u64) -> Vec<u8> {
+    let mut out = tag(field, 0);
+    out.extend(varint(val));
+    out
+}
+
+// ─── SendUserCascadeMessageRequest builder ───────────────────────────────────
+
+/// Build the `SendUserCascadeMessageRequest` protobuf binary.
+///
+/// Produces a byte-exact match to real Antigravity webview traffic.
+/// Verified against Chrome DevTools network capture 2026-02-12.
+///
+/// Field layout:
+///   1: cascade_id (string)
+///   2: { 1: text } (message)
+///   3: metadata { 1: client_name, 3: oauth_token, 4: "en", 7: version, 12: client_name }
+///   5: PlannerConfig { 1: inner_config, 7: { 1: 1 } }
+///      inner_config contains: f2 (conv mode), f13 (tool config), f15 (model), f21 (ephemeral), f32 (knowledge)
+///  11: conversation_history = true
+pub fn build_request(cascade_id: &str, text: &str, oauth_token: &str, model_enum: u32) -> Vec<u8> {
+    let mut msg = Vec::with_capacity(256);
+
+    // Field 1: cascade_id
+    msg.extend(proto_string(1, cascade_id.as_bytes()));
+
+    // Field 2: { field 1: text }
+    msg.extend(proto_message(2, &proto_string(1, text.as_bytes())));
+
+    // Field 3: Metadata (Auth + Client ID)
+    let mut meta = Vec::new();
+    meta.extend(proto_string(1, CLIENT_NAME.as_bytes()));
+    meta.extend(proto_string(3, oauth_token.as_bytes()));
+    meta.extend(proto_string(4, b"en"));
+    meta.extend(proto_string(7, client_version().as_bytes()));
+    meta.extend(proto_string(12, CLIENT_NAME.as_bytes()));
+    msg.extend(proto_message(3, &meta));
+
+    // Field 5: PlannerConfig
+    let mut inner = Vec::new();
+
+    // field 2: conversational mode { f4: 1, f14: 0 }
+    let conv_mode = [varint_field(4, 1), varint_field(14, 0)].concat();
+    inner.extend(proto_message(2, &conv_mode));
+
+    // field 13: toolConfig
+    //   field 8 (runCommand): field 3 (autoCommandConfig) -> field 6 (policy) = 3 (EAGER)
+    //   field 33 (artifactReviewPolicy): field 1 = 2 (TURBO)
+    let run_cmd = proto_message(3, &varint_field(6, 3));
+    let tool_config = [
+        proto_message(8, &run_cmd),
+        proto_message(33, &varint_field(1, 2)),
+    ]
+    .concat();
+    inner.extend(proto_message(13, &tool_config));
+
+    // field 15: requested model { f1: model_enum }
+    inner.extend(proto_message(15, &varint_field(1, model_enum as u64)));
+
+    // field 21: ephemeral messages config { f1: 1 }
+    inner.extend(proto_message(21, &varint_field(1, 1)));
+
+    // field 32: knowledge config { f1: true }
+    inner.extend(proto_message(32, &bool_field(1, true)));
+
+    // Field 5 wraps: field 1 (inner config) + field 7 { f1: 1 }
+    let f5_payload = [
+        proto_message(1, &inner),
+        proto_message(7, &varint_field(1, 1)),
+    ]
+    .concat();
+    msg.extend(proto_message(5, &f5_payload));
+
+    // Field 11: conversation history flag
+    msg.extend(bool_field(11, true));
+
+    msg
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_varint_zero() {
+        assert_eq!(varint(0), vec![0x00]);
+    }
+
+    #[test]
+    fn test_varint_small() {
+        assert_eq!(varint(1), vec![0x01]);
+        assert_eq!(varint(127), vec![0x7F]);
+    }
+
+    #[test]
+    fn test_varint_multibyte() {
+        assert_eq!(varint(128), vec![0x80, 0x01]);
+        assert_eq!(varint(300), vec![0xAC, 0x02]);
+    }
+
+    #[test]
+    fn test_varint_1026() {
+        // model_enum 1026 = 0x402 → varint [0x82, 0x08]
+        assert_eq!(varint(1026), vec![0x82, 0x08]);
+    }
+
+    #[test]
+    fn test_tag() {
+        // field 1, wire type 2 (LEN) = (1 << 3) | 2 = 0x0A
+        assert_eq!(tag(1, 2), vec![0x0A]);
+        // field 3, wire type 0 (VARINT) = (3 << 3) | 0 = 0x18
+        assert_eq!(tag(3, 0), vec![0x18]);
+        // field 33, wire type 2 = (33 << 3) | 2 = 266 → varint [0x8A, 0x02]
+        assert_eq!(tag(33, 2), vec![0x8A, 0x02]);
+    }
+
+    #[test]
+    fn test_proto_string() {
+        let result = proto_string(1, b"hi");
+        // tag(1,2) = 0x0A, len=2, 'h'=0x68, 'i'=0x69
+        assert_eq!(result, vec![0x0A, 0x02, 0x68, 0x69]);
+    }
+
+    #[test]
+    fn test_build_request_deterministic() {
+        let a = build_request("cid", "hello", "ya29.tok", 1026);
+        let b = build_request("cid", "hello", "ya29.tok", 1026);
+        assert_eq!(a, b, "build_request must be deterministic");
+    }
+
+    #[test]
+    fn test_build_request_structure() {
+        let msg = build_request("test-cascade-id", "hello", "ya29.test-token", 1026);
+
+        assert_eq!(msg[0], 0x0A, "first byte must be field 1 tag");
+
+        let cascade_bytes = b"test-cascade-id";
+        assert!(
+            msg.windows(cascade_bytes.len())
+                .any(|w| w == cascade_bytes),
+            "cascade_id must appear in output"
+        );
+
+        assert!(
+            msg.windows(5).any(|w| w == b"hello"),
+            "text must appear in output"
+        );
+
+        let token_bytes = b"ya29.test-token";
+        assert!(
+            msg.windows(token_bytes.len()).any(|w| w == token_bytes),
+            "oauth token must appear in output"
+        );
+
+        // model enum 1026 varint [0x82, 0x08]
+        assert!(
+            msg.windows(2).any(|w| w == [0x82, 0x08]),
+            "model enum 1026 varint must appear in output"
+        );
+
+        // field 11 bool true at end: tag(11,0)=0x58, varint(1)=0x01
+        let len = msg.len();
+        assert_eq!(msg[len - 2], 0x58);
+        assert_eq!(msg[len - 1], 0x01);
+    }
+
+    /// Cross-verified against Python output: 127/127 bytes identical.
+    #[test]
+    fn test_byte_exact_match_with_python() {
+        let msg = build_request("test-cascade-id", "hello", "ya29.test-token", 1026);
+        let hex: String = msg.iter().map(|b| format!("{:02x}", b)).collect();
+        let expected = "0a0f746573742d636173636164652d696412070a0568656c6c6f\
+            1a370a0b616e7469677261766974791a0f796132392e746573742d746f6b656e\
+            2202656e3a06312e31362e35620b616e7469677261766974792a280a22120420\
+            0170006a0b42041a0230038a020208027a03088208aa010208018202020801\
+            3a0208015801";
+        assert_eq!(hex, expected, "must be byte-exact match with Python");
+        assert_eq!(msg.len(), 127);
+    }
+}
diff --git a/src/quota.rs b/src/quota.rs
new file mode 100644
index 0000000..650e293
--- /dev/null
+++ b/src/quota.rs
@@ -0,0 +1,218 @@
+//! Quota monitor — polls the local LS `GetUserStatus` to track
+//! prompt/flow credits and per-model rate limits without touching Google servers.
+
+use serde::Serialize;
+use std::sync::Arc;
+use tokio::sync::RwLock;
+use tracing::{debug, warn};
+
+/// How often to poll the LS for fresh quota data (seconds).
+const POLL_INTERVAL_SECS: u64 = 60;
+
+// ─── Public types ────────────────────────────────────────────────────────────
+
+#[derive(Debug, Clone, Serialize, Default)]
+pub struct QuotaSnapshot {
+    /// When this snapshot was last refreshed (ISO-8601 UTC).
+    pub last_updated: String,
+    /// Overall plan info.
+    pub plan: PlanInfo,
+    /// Monthly credit balances.
+    pub credits: CreditInfo,
+    /// Per-model rate limits.
+    pub models: Vec<ModelQuota>,
+}
+
+#[derive(Debug, Clone, Serialize, Default)]
+pub struct PlanInfo {
+    pub plan_name: String,
+    pub tier_id: String,
+    pub tier_name: String,
+}
+
+#[derive(Debug, Clone, Serialize, Default)]
+pub struct CreditInfo {
+    pub prompt_available: i64,
+    pub prompt_total: i64,
+    pub prompt_used_pct: f64,
+    pub flow_available: i64,
+    pub flow_total: i64,
+    pub flow_used_pct: f64,
+    pub flex_purchasable: i64,
+    pub can_buy_more: bool,
+}
+
+#[derive(Debug, Clone, Serialize, Default)]
+pub struct ModelQuota {
+    pub label: String,
+    pub model_id: String,
+    /// 0.0–1.0 remaining fraction (1.0 = full quota).
+    pub remaining_fraction: f64,
+    /// Percentage remaining (0–100).
+    pub remaining_pct: f64,
+    /// ISO-8601 UTC reset time.
+    pub reset_time: String,
+    /// Seconds until reset (negative = already reset).
+    pub reset_in_secs: i64,
+    /// Human-readable countdown.
+    pub reset_in_human: String,
+}
+
+// ─── Quota Store ─────────────────────────────────────────────────────────────
+
+#[derive(Clone)]
+pub struct QuotaStore {
+    inner: Arc<RwLock<QuotaSnapshot>>,
+}
+
+impl QuotaStore {
+    pub fn new() -> Self {
+        Self {
+            inner: Arc::new(RwLock::new(QuotaSnapshot::default())),
+        }
+    }
+
+    /// Get the latest cached snapshot.
+    pub async fn snapshot(&self) -> QuotaSnapshot {
+        self.inner.read().await.clone()
+    }
+
+    /// Start the background polling loop. Call once at startup.
+    pub fn start_polling(self, backend: Arc<crate::backend::Backend>) {
+        tokio::spawn(async move {
+            // Initial poll immediately.
+            self.poll_once(&backend).await;
+
+            let mut interval = tokio::time::interval(
+                std::time::Duration::from_secs(POLL_INTERVAL_SECS),
+            );
+            interval.tick().await; // consume the first immediate tick
+
+            loop {
+                interval.tick().await;
+                self.poll_once(&backend).await;
+            }
+        });
+    }
+
+    async fn poll_once(&self, backend: &crate::backend::Backend) {
+        match backend
+            .call_json("GetUserStatus", &serde_json::json!({}))
+            .await
+        {
+            Ok((200, data)) => {
+                let snapshot = parse_user_status(&data);
+                debug!(
+                    "Quota poll: prompt {}/{} flow {}/{}",
+                    snapshot.credits.prompt_available,
+                    snapshot.credits.prompt_total,
+                    snapshot.credits.flow_available,
+                    snapshot.credits.flow_total,
+                );
+                *self.inner.write().await = snapshot;
+            }
+            Ok((status, data)) => {
+                warn!("GetUserStatus returned {status}: {data}");
+            }
+            Err(e) => {
+                warn!("GetUserStatus poll failed: {e}");
+            }
+        }
+    }
+}
+
+// ─── Parsing ─────────────────────────────────────────────────────────────────
+
+fn parse_user_status(data: &serde_json::Value) -> QuotaSnapshot {
+    let now = chrono::Utc::now();
+    let us = &data["userStatus"];
+    let ps = &us["planStatus"];
+    let pi = &ps["planInfo"];
+    let ut = &us["userTier"];
+
+    let prompt_total = pi["monthlyPromptCredits"].as_i64().unwrap_or(0);
+    let prompt_avail = ps["availablePromptCredits"].as_i64().unwrap_or(0);
+    let flow_total = pi["monthlyFlowCredits"].as_i64().unwrap_or(0);
+    let flow_avail = ps["availableFlowCredits"].as_i64().unwrap_or(0);
+
+    let prompt_used_pct = if prompt_total > 0 {
+        ((prompt_total - prompt_avail) as f64 / prompt_total as f64) * 100.0
+    } else {
+        0.0
+    };
+    let flow_used_pct = if flow_total > 0 {
+        ((flow_total - flow_avail) as f64 / flow_total as f64) * 100.0
+    } else {
+        0.0
+    };
+
+    let models = us["cascadeModelConfigData"]["clientModelConfigs"]
+        .as_array()
+        .map(|arr| {
+            arr.iter()
+                .map(|m| {
+                    let label = m["label"].as_str().unwrap_or("").to_string();
+                    let model_id = m["modelOrAlias"]["model"]
+                        .as_str()
+                        .unwrap_or("")
+                        .to_string();
+                    let frac = m["quotaInfo"]["remainingFraction"]
+                        .as_f64()
+                        .unwrap_or(0.0);
+                    let reset_str = m["quotaInfo"]["resetTime"]
+                        .as_str()
+                        .unwrap_or("")
+                        .to_string();
+
+                    let reset_in_secs = if !reset_str.is_empty() {
+                        chrono::DateTime::parse_from_rfc3339(&reset_str)
+                            .map(|dt| (dt.with_timezone(&chrono::Utc) - now).num_seconds())
+                            .unwrap_or(0)
+                    } else {
+                        0
+                    };
+
+                    let reset_in_human = if reset_in_secs > 0 {
+                        let h = reset_in_secs / 3600;
+                        let m = (reset_in_secs % 3600) / 60;
+                        format!("{h}h {m}m")
+                    } else {
+                        "available".to_string()
+                    };
+
+                    ModelQuota {
+                        label,
+                        model_id,
+                        remaining_fraction: frac,
+                        remaining_pct: frac * 100.0,
+                        reset_time: reset_str,
+                        reset_in_secs,
+                        reset_in_human,
+                    }
+                })
+                .collect()
+        })
+        .unwrap_or_default();
+
+    QuotaSnapshot {
+        last_updated: now.to_rfc3339(),
+        plan: PlanInfo {
+            plan_name: pi["planName"].as_str().unwrap_or("").to_string(),
+            tier_id: ut["id"].as_str().unwrap_or("").to_string(),
+            tier_name: ut["name"].as_str().unwrap_or("").to_string(),
+        },
+        credits: CreditInfo {
+            prompt_available: prompt_avail,
+            prompt_total,
+            prompt_used_pct,
+            flow_available: flow_avail,
+            flow_total,
+            flow_used_pct,
+            flex_purchasable: pi["monthlyFlexCreditPurchaseAmount"]
+                .as_i64()
+                .unwrap_or(0),
+            can_buy_more: pi["canBuyMoreCredits"].as_bool().unwrap_or(false),
+        },
+        models,
+    }
+}
diff --git a/src/session.rs b/src/session.rs
new file mode 100644
index 0000000..2daeba0
--- /dev/null
+++ b/src/session.rs
@@ -0,0 +1,152 @@
+//! Cascade session manager — maps session IDs to cascade IDs for reuse.
+//!
+//! Mimics real webview behavior: one chat tab = one cascade with many messages.
+//! Without this, every API call creates a new cascade — an obvious automation
+//! fingerprint (100 calls = 100 single-message cascades).
+
+use std::collections::HashMap;
+use std::time::Instant;
+use tokio::sync::RwLock;
+
+const DEFAULT_SESSION: &str = "__default__";
+const SESSION_TTL_SECS: u64 = 3600 * 4; // 4 hours
+
+#[derive(Clone)]
+struct Session {
+    cascade_id: String,
+    created: Instant,
+    last_used: Instant,
+    msg_count: u64,
+}
+
+pub struct SessionManager {
+    sessions: RwLock<HashMap<String, Session>>,
+}
+
+/// Result of session resolution.
+pub struct SessionResult {
+    pub cascade_id: String,
+}
+
+impl SessionManager {
+    pub fn new() -> Self {
+        Self {
+            sessions: RwLock::new(HashMap::new()),
+        }
+    }
+
+    /// Get existing cascade for session, or create a new one.
+    ///
+    /// - `session_id = None` → use default session
+    /// - `session_id = Some("new")` → always create fresh cascade
+    /// - `session_id = Some("my-task")` → reuse cascade for that task
+    ///
+    /// Uses double-check locking to avoid TOCTOU races: after creating a cascade,
+    /// re-acquires the lock and checks if another request raced us.
+    pub async fn get_or_create<F, Fut>(
+        &self,
+        session_id: Option<&str>,
+        create_fn: F,
+    ) -> Result<SessionResult, String>
+    where
+        F: FnOnce() -> Fut,
+        Fut: std::future::Future<Output = Result<String, String>>,
+    {
+        // "new" always creates a fresh cascade
+        if session_id == Some("new") {
+            let cascade_id = create_fn().await?;
+            let new_sid = format!("s-{}", &uuid::Uuid::new_v4().to_string()[..8]);
+            let mut sessions = self.sessions.write().await;
+            sessions.insert(
+                new_sid.clone(),
+                Session {
+                    cascade_id: cascade_id.clone(),
+                    created: Instant::now(),
+                    last_used: Instant::now(),
+                    msg_count: 0,
+                },
+            );
+            return Ok(SessionResult {
+                cascade_id,
+            });
+        }
+
+        let sid = session_id.unwrap_or(DEFAULT_SESSION).to_string();
+
+        // Check existing — only need write lock for cleanup + mutation
+        {
+            let mut sessions = self.sessions.write().await;
+            cleanup_expired(&mut sessions);
+            if let Some(sess) = sessions.get_mut(&sid) {
+                sess.last_used = Instant::now();
+                sess.msg_count += 1;
+                return Ok(SessionResult {
+                    cascade_id: sess.cascade_id.clone(),
+                });
+            }
+        }
+        // Lock released before async create_fn
+
+        // Create new cascade (this may take a while — lock is NOT held)
+        let cascade_id = create_fn().await?;
+
+        // Double-check: another request may have raced us and created the same session
+        {
+            let mut sessions = self.sessions.write().await;
+            if let Some(existing) = sessions.get_mut(&sid) {
+                // Another request won the race — use their cascade, discard ours
+                existing.last_used = Instant::now();
+                existing.msg_count += 1;
+                return Ok(SessionResult {
+                    cascade_id: existing.cascade_id.clone(),
+                });
+            }
+            sessions.insert(
+                sid.clone(),
+                Session {
+                    cascade_id: cascade_id.clone(),
+                    created: Instant::now(),
+                    last_used: Instant::now(),
+                    msg_count: 1,
+                },
+            );
+        }
+        Ok(SessionResult {
+            cascade_id,
+        })
+    }
+
+    /// List all active sessions.
+    pub async fn list_sessions(&self) -> serde_json::Value {
+        let mut sessions = self.sessions.write().await;
+        cleanup_expired(&mut sessions);
+
+        let now = Instant::now();
+        let mut map = serde_json::Map::new();
+        for (sid, sess) in sessions.iter() {
+            map.insert(
+                sid.clone(),
+                serde_json::json!({
+                    "cascade_id": sess.cascade_id,
+                    "msg_count": sess.msg_count,
+                    "age_seconds": now.duration_since(sess.created).as_secs(),
+                    "idle_seconds": now.duration_since(sess.last_used).as_secs(),
+                }),
+            );
+        }
+        serde_json::Value::Object(map)
+    }
+
+    /// Delete a session. Returns true if it existed.
+    pub async fn delete_session(&self, session_id: &str) -> bool {
+        let mut sessions = self.sessions.write().await;
+        sessions.remove(session_id).is_some()
+    }
+}
+
+fn cleanup_expired(sessions: &mut HashMap<String, Session>) {
+    let now = Instant::now();
+    sessions.retain(|_, s| {
+        now.duration_since(s.last_used).as_secs() < SESSION_TTL_SECS
+    });
+}
diff --git a/src/warmup.rs b/src/warmup.rs
new file mode 100644
index 0000000..00bd109
--- /dev/null
+++ b/src/warmup.rs
@@ -0,0 +1,69 @@
+//! Startup warmup and periodic heartbeat — mimics real webview lifecycle.
+//!
+//! The real Electron webview calls these methods on startup and then sends
+//! Heartbeat every ~30 seconds. Without this, the LS sees a "user" that
+//! never initializes and never heartbeats — an obvious bot fingerprint.
+
+use crate::backend::Backend;
+use rand::Rng;
+use std::sync::Arc;
+use std::time::Duration;
+use tokio::task::JoinHandle;
+use tracing::{debug, info, warn};
+
+/// Run the exact startup sequence the real webview performs on load.
+///
+/// Called BEFORE accepting any API requests. Each call is fire-and-forget
+/// (we don't care if some fail — the LS might not support all methods).
+pub async fn warmup_sequence(backend: &Backend) {
+    info!("Running webview warmup sequence...");
+
+    let calls: &[(&str, serde_json::Value)] = &[
+        ("GetStatus", serde_json::json!({})),
+        ("Heartbeat", serde_json::json!({})),
+        ("GetUserStatus", serde_json::json!({})),
+        ("GetCascadeModelConfigs", serde_json::json!({})),
+        ("GetCascadeModelConfigData", serde_json::json!({})),
+        ("GetWorkspaceInfos", serde_json::json!({})),
+        ("GetWorkingDirectories", serde_json::json!({})),
+        ("GetAllCascadeTrajectories", serde_json::json!({})),
+        ("GetMcpServerStates", serde_json::json!({})),
+        ("GetWebDocsOptions", serde_json::json!({})),
+        ("GetRepoInfos", serde_json::json!({})),
+        ("GetAllSkills", serde_json::json!({})),
+        ("InitializeCascadePanelState", serde_json::json!({})),
+    ];
+
+    for (method, body) in calls {
+        match backend.call_json(method, body).await {
+            Ok((status, _)) => debug!("Warmup {method}: {status}"),
+            Err(e) => warn!("Warmup {method} failed: {e}"),
+        }
+        // Small delay between calls — real webview doesn't blast them instantly
+        let delay = rand::thread_rng().gen_range(50..200);
+        tokio::time::sleep(Duration::from_millis(delay)).await;
+    }
+
+    info!("Warmup complete");
+}
+
+/// Spawn a background task that sends Heartbeat every ~30s ± jitter.
+///
+/// Returns a JoinHandle that runs until the task is aborted (on shutdown).
+pub fn start_heartbeat(backend: Arc<Backend>) -> JoinHandle<()> {
+    tokio::spawn(async move {
+        loop {
+            // ~30s interval (± 500ms) — matches real setInterval(30000) precision
+            let interval_ms = rand::thread_rng().gen_range(29_500..30_500);
+            tokio::time::sleep(Duration::from_millis(interval_ms)).await;
+
+            match backend
+                .call_json("Heartbeat", &serde_json::json!({}))
+                .await
+            {
+                Ok((status, _)) => debug!("Heartbeat: {status}"),
+                Err(e) => warn!("Heartbeat failed: {e}"),
+            }
+        }
+    })
+}