From 13459daf7adb4ed232039af22adb7ad245e13362 Mon Sep 17 00:00:00 2001 From: claw Date: Sat, 28 Mar 2026 00:20:48 +0000 Subject: [PATCH] Add lightweight pre-commit secret scanning --- .githooks/pre-commit | 4 +++ README.md | 5 +++ docs/git-hooks.md | 45 ++++++++++++++++++++++++ scripts/scan-secrets.sh | 77 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+) create mode 100755 .githooks/pre-commit create mode 100644 docs/git-hooks.md create mode 100755 scripts/scan-secrets.sh diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..3a28364 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail + +scripts/scan-secrets.sh diff --git a/README.md b/README.md index 1fd5b59..0c3f2cc 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,11 @@ Default philosophy: See `docs/repo-conventions.md` for commit/push guidance and repo hygiene. +## Git hygiene + +This repo includes a lightweight local pre-commit secret scan. +See `docs/git-hooks.md` for details. + ## Cron / automation See `cron/README.md` for the current recurring-job layout and documentation pattern. diff --git a/docs/git-hooks.md b/docs/git-hooks.md new file mode 100644 index 0000000..a248d94 --- /dev/null +++ b/docs/git-hooks.md @@ -0,0 +1,45 @@ +# Git Hooks / Secret Scan + +This repo uses a lightweight local pre-commit hook for obvious secret hygiene. + +## What it does + +On `git commit`, the hook runs: +- `scripts/scan-secrets.sh` + +The scanner checks **staged content** for a small set of high-signal patterns, including: +- private key blocks +- common cloud/API token formats +- suspicious inline assignments like `TOKEN=...` or `PASSWORD: ...` + +It is intentionally conservative and lightweight. + +## Why this exists + +Goal: catch obvious mistakes before they land in git. + +It is **not** meant to be a full secret management or DLP system. + +## Configuration + +This repo uses a repo-local hooks path: +- `.githooks/` + +Configured via: +```bash +git config core.hooksPath .githooks +``` + +## Bypass + +If the scanner throws a false positive, you can bypass it once with: +```bash +git commit --no-verify +``` + +Use that sparingly and only after reviewing the staged diff. + +## Maintenance + +If the scanner is too noisy, tighten patterns. +If it misses obvious mistakes, add narrowly targeted patterns rather than broad generic ones. diff --git a/scripts/scan-secrets.sh b/scripts/scan-secrets.sh new file mode 100755 index 0000000..32389b1 --- /dev/null +++ b/scripts/scan-secrets.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Lightweight staged-file secret scan for local git hygiene. +# Focus: obvious high-risk mistakes, not exhaustive DLP. + +if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then + echo "Not inside a git work tree" >&2 + exit 2 +fi + +staged_files=$(git diff --cached --name-only --diff-filter=ACMR) + +if [[ -z "${staged_files}" ]]; then + exit 0 +fi + +fail=0 + +# High-signal patterns only. Avoid noisy generic matches. +patterns=( + 'AKIA[0-9A-Z]{16}' + 'AIza[0-9A-Za-z\-_]{35}' + 'ghp_[0-9A-Za-z]{36}' + 'github_pat_[0-9A-Za-z_]{20,}' + 'xox[baprs]-[0-9A-Za-z-]{10,}' + '-----BEGIN (RSA|DSA|EC|OPENSSH|PGP)? ?PRIVATE KEY-----' + 'AIzaSy[0-9A-Za-z\-_]+' +) + +while IFS= read -r file; do + [[ -f "$file" ]] || continue + + # Skip common binary assets. + if file "$file" 2>/dev/null | grep -qiE 'image|font|audio|video|compressed|archive|executable'; then + continue + fi + + # Scan staged content, not working tree content. + staged_content=$(git show ":$file" 2>/dev/null || true) + [[ -n "$staged_content" ]] || continue + + for pattern in "${patterns[@]}"; do + if printf '%s' "$staged_content" | grep -nE "$pattern" >/tmp/openclaw-secret-scan-match.$$ 2>/dev/null; then + echo "Potential secret detected in staged file: $file" >&2 + sed 's/^/ /' /tmp/openclaw-secret-scan-match.$$ >&2 || true + fail=1 + fi + done + + # Heuristic: suspicious assignments for secret-ish variable names with inline values. + if printf '%s' "$staged_content" | grep -nE '(^|[[:space:]])(API_KEY|TOKEN|SECRET|PASSWORD|PASSWD|PRIVATE_KEY)[[:space:]]*[:=][[:space:]]*[^[:space:]]+' >/tmp/openclaw-secret-scan-heuristic.$$ 2>/dev/null; then + echo "Suspicious inline credential assignment in staged file: $file" >&2 + sed 's/^/ /' /tmp/openclaw-secret-scan-heuristic.$$ >&2 || true + fail=1 + fi + +done <<< "$staged_files" + +rm -f /tmp/openclaw-secret-scan-match.$$ /tmp/openclaw-secret-scan-heuristic.$$ 2>/dev/null || true + +if [[ "$fail" -ne 0 ]]; then + cat >&2 <<'EOF' +Commit blocked by lightweight secret scan. + +If this is a false positive: +- inspect the staged diff carefully +- remove/redact the sensitive-looking value if needed +- re-stage and commit again +- or bypass once with: git commit --no-verify + +Use --no-verify sparingly. +EOF + exit 1 +fi + +exit 0