Add lightweight pre-commit secret scanning

2026-03-28 00:20:48 +00:00
parent 1b339d5bce
commit 13459daf7a
4 changed files with 131 additions and 0 deletions
--- a/.githooks/pre-commit
+++ b/.githooks/pre-commit
@@ -0,0 +1,4 @@
 #!/usr/bin/env bash
 set -euo pipefail
 scripts/scan-secrets.sh
--- a/README.md
+++ b/README.md
@@ -52,6 +52,11 @@ Default philosophy:
 See `docs/repo-conventions.md` for commit/push guidance and repo hygiene.
 ## Git hygiene
 This repo includes a lightweight local pre-commit secret scan.
 See `docs/git-hooks.md` for details.
 ## Cron / automation
 See `cron/README.md` for the current recurring-job layout and documentation pattern.
--- a/docs/git-hooks.md
+++ b/docs/git-hooks.md
@@ -0,0 +1,45 @@
 # Git Hooks / Secret Scan
 This repo uses a lightweight local pre-commit hook for obvious secret hygiene.
 ## What it does
 On `git commit`, the hook runs:
 - `scripts/scan-secrets.sh`
 The scanner checks **staged content** for a small set of high-signal patterns, including:
 - private key blocks
 - common cloud/API token formats
 - suspicious inline assignments like `TOKEN=...` or `PASSWORD: ...`
 It is intentionally conservative and lightweight.
 ## Why this exists
 Goal: catch obvious mistakes before they land in git.
 It is **not** meant to be a full secret management or DLP system.
 ## Configuration
 This repo uses a repo-local hooks path:
 - `.githooks/`
 Configured via:
 ```bash
 git config core.hooksPath .githooks
 ```
 ## Bypass
 If the scanner throws a false positive, you can bypass it once with:
 ```bash
 git commit --no-verify
 ```
 Use that sparingly and only after reviewing the staged diff.
 ## Maintenance
 If the scanner is too noisy, tighten patterns.
 If it misses obvious mistakes, add narrowly targeted patterns rather than broad generic ones.
--- a/scripts/scan-secrets.sh
+++ b/scripts/scan-secrets.sh
@@ -0,0 +1,77 @@
 #!/usr/bin/env bash
 set -euo pipefail
 # Lightweight staged-file secret scan for local git hygiene.
 # Focus: obvious high-risk mistakes, not exhaustive DLP.
 if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
  echo "Not inside a git work tree" >&2
  exit 2
 fi
 staged_files=$(git diff --cached --name-only --diff-filter=ACMR)
 if [[ -z "${staged_files}" ]]; then
  exit 0
 fi
 fail=0
 # High-signal patterns only. Avoid noisy generic matches.
 patterns=(
  'AKIA[0-9A-Z]{16}'
  'AIza[0-9A-Za-z\-_]{35}'
  'ghp_[0-9A-Za-z]{36}'
  'github_pat_[0-9A-Za-z_]{20,}'
  'xox[baprs]-[0-9A-Za-z-]{10,}'
  '-----BEGIN (RSA|DSA|EC|OPENSSH|PGP)? ?PRIVATE KEY-----'
  'AIzaSy[0-9A-Za-z\-_]+'
 )
 while IFS= read -r file; do
  [[ -f "$file" ]] || continue
  # Skip common binary assets.
  if file "$file" 2>/dev/null | grep -qiE 'image|font|audio|video|compressed|archive|executable'; then
    continue
  fi
  # Scan staged content, not working tree content.
  staged_content=$(git show ":$file" 2>/dev/null || true)
  [[ -n "$staged_content" ]] || continue
  for pattern in "${patterns[@]}"; do
    if printf '%s' "$staged_content" | grep -nE "$pattern" >/tmp/openclaw-secret-scan-match.$$ 2>/dev/null; then
      echo "Potential secret detected in staged file: $file" >&2
      sed 's/^/  /' /tmp/openclaw-secret-scan-match.$$ >&2 || true
      fail=1
    fi
  done
  # Heuristic: suspicious assignments for secret-ish variable names with inline values.
  if printf '%s' "$staged_content" | grep -nE '(^|[[:space:]])(API_KEY|TOKEN|SECRET|PASSWORD|PASSWD|PRIVATE_KEY)[[:space:]]*[:=][[:space:]]*[^[:space:]]+' >/tmp/openclaw-secret-scan-heuristic.$$ 2>/dev/null; then
    echo "Suspicious inline credential assignment in staged file: $file" >&2
    sed 's/^/  /' /tmp/openclaw-secret-scan-heuristic.$$ >&2 || true
    fail=1
  fi
 done <<< "$staged_files"
 rm -f /tmp/openclaw-secret-scan-match.$$ /tmp/openclaw-secret-scan-heuristic.$$ 2>/dev/null || true
 if [[ "$fail" -ne 0 ]]; then
  cat >&2 <<'EOF'
 Commit blocked by lightweight secret scan.
 If this is a false positive:
 - inspect the staged diff carefully
 - remove/redact the sensitive-looking value if needed
 - re-stage and commit again
 - or bypass once with: git commit --no-verify
 Use --no-verify sparingly.
 EOF
  exit 1
 fi
 exit 0