#!/usr/bin/env bash set -euo pipefail REG_JSON="/home/node/.openclaw/memory/channel-registry.json" REG_MD="/home/node/.openclaw/memory/channel-registry.md" OVERRIDES_JSON="/home/node/.openclaw/memory/channel-name-overrides.json" python3 - <<'PY' import json, re from pathlib import Path from datetime import datetime, timezone reg_path = Path('/home/node/.openclaw/memory/channel-registry.json') md_path = Path('/home/node/.openclaw/memory/channel-registry.md') overrides_path = Path('/home/node/.openclaw/memory/channel-name-overrides.json') reg = json.loads(reg_path.read_text()) entries = reg.get('entries', []) idx = {(e['platform'], e['kind'], e['id']): e for e in entries} obs = {} def note(cid, key, value): if not cid or not value: return obs.setdefault(cid, {})[key] = value # 1) Explicit overrides win (manual curated) if overrides_path.exists(): try: ov = json.loads(overrides_path.read_text()) for cid, data in (ov.get('discord', {}) or {}).items(): if isinstance(data, dict): for k in ('guild_name','channel_name','thread_name','guild_id'): if data.get(k): note(cid, k, data[k]) except Exception: pass # Ensure override IDs are represented even if not referenced yet for cid, data in (ov.get('discord', {}) or {}).items() if 'ov' in locals() else []: if not isinstance(data, dict): continue kind = 'guild' if data.get('guild_name') and not data.get('channel_name') and not data.get('thread_name') else 'channel' key = ('discord', kind, cid) if key not in idx: entries.append({ 'platform': 'discord', 'kind': kind, 'id': cid, 'guild_id': data.get('guild_id') or (cid if kind == 'guild' else None), 'guild_name': data.get('guild_name'), 'channel_name': data.get('channel_name'), 'thread_name': data.get('thread_name'), 'agent_owner': None, 'used_by': ['override:manual'], 'purpose': 'manual override registry seed', 'status': 'active' if (data.get('guild_name') or data.get('channel_name') or data.get('thread_name')) else 'unresolved', 'last_verified_utc': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') }) idx[key] = entries[-1] # 2) Scan transcripts for embedded metadata (bounded to prevent huge-context blowups) roots = [ Path('/home/node/.openclaw/workspace'), Path('/home/node/.openclaw/workspace-home'), Path('/home/node/.openclaw/workspace-security'), Path('/home/node/.openclaw/workspace-research'), ] MAX_JSONL_FILES = 400 MAX_FILE_SCAN_BYTES = 1_000_000 MAX_TOTAL_SCAN_BYTES = 25_000_000 pat_discord = re.compile(r'discord:(\d+)#([A-Za-z0-9_-]+)') pat_conv = re.compile(r'channel id:(\d+)') pat_group_channel = re.compile(r'"group_channel"\s*:\s*"(#?[^"]+)"') pat_thread = re.compile(r'"thread_label"\s*:\s*"Discord thread\s+#([^›"]+)\s+›\s+([^"]+)"') pat_subject = re.compile(r'"group_subject"\s*:\s*"(#?[^"]+)"') def bounded_read_jsonl(path: Path, limit_bytes: int) -> str: size = path.stat().st_size with path.open('rb') as fh: if size <= limit_bytes: data = fh.read(limit_bytes) else: head = fh.read(limit_bytes // 2) fh.seek(max(0, size - (limit_bytes // 2))) tail = fh.read(limit_bytes // 2) data = head + b'\n...TRUNCATED...\n' + tail return data.decode('utf-8', errors='ignore') jsonl_paths = [] for root in roots: if root.exists(): jsonl_paths.extend(root.rglob('*.jsonl')) bytes_scanned = 0 for p in sorted(jsonl_paths)[:MAX_JSONL_FILES]: if bytes_scanned >= MAX_TOTAL_SCAN_BYTES: break budget_left = MAX_TOTAL_SCAN_BYTES - bytes_scanned per_file_cap = min(MAX_FILE_SCAN_BYTES, budget_left) if per_file_cap <= 0: break try: txt = bounded_read_jsonl(p, per_file_cap) except Exception: continue bytes_scanned += len(txt.encode('utf-8', errors='ignore')) # pattern: discord:#name for m in pat_discord.finditer(txt): cid, cname = m.group(1), m.group(2) if not cname.startswith('#'): cname = '#' + cname note(cid, 'channel_name', cname) # conversation metadata blocks for m in pat_conv.finditer(txt): cid = m.group(1) window = txt[max(0, m.start()-1200): m.end()+1200] gm = pat_group_channel.search(window) if gm: cname = gm.group(1) if cname and not cname.startswith('#'): cname = '#' + cname note(cid, 'channel_name', cname) sm = pat_subject.search(window) if sm and not obs.get(cid, {}).get('channel_name'): sname = sm.group(1) if sname and not sname.startswith('#'): sname = '#' + sname note(cid, 'channel_name', sname) tm = pat_thread.search(window) if tm: # forum-ish parent and thread label forum = tm.group(1).strip() tname = tm.group(2).strip() if forum: if not forum.startswith('#'): forum = '#' + forum note(cid, 'channel_name', forum) note(cid, 'thread_name', tname) # 3) Apply observations to registry now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') changed = 0 for e in entries: if e.get('platform') != 'discord': continue cid = e.get('id') data = obs.get(cid, {}) before = json.dumps(e, sort_keys=True) for k in ('guild_id','guild_name','channel_name','thread_name'): if data.get(k) and not e.get(k): e[k] = data[k] # status rule if e.get('kind') == 'guild': e['status'] = 'active' if e.get('guild_name') else 'unresolved' else: e['status'] = 'active' if (e.get('channel_name') or e.get('thread_name')) else 'unresolved' e['last_verified_utc'] = now after = json.dumps(e, sort_keys=True) if before != after: changed += 1 reg['updated_utc'] = now reg_path.write_text(json.dumps(reg, indent=2) + '\n') # 4) Render markdown table from JSON lines = [] lines.append('# Channel Registry') lines.append('') lines.append('Global ID→name registry for cron delivery targets and routing bindings.') lines.append('') lines.append('## Resolution Policy') lines.append('- IDs are canonical; names are metadata and may drift.') lines.append('- Auto-resolution uses transcript/session metadata + optional overrides file.') lines.append('- Any referenced entry with `status: unresolved` must be manually resolved.') lines.append('') lines.append('## Entries') lines.append('') lines.append('| Platform | Kind | ID | Guild ID | Guild Name | Channel Name | Thread Name | Agent Owner | Status | Used By |') lines.append('|---|---|---|---|---|---|---|---|---|---|') for e in sorted(entries, key=lambda x: (x['platform'], x['kind'], x['id'])): lines.append( f"| {e.get('platform','')} | {e.get('kind','')} | `{e.get('id','')}` | `{e.get('guild_id') or ''}` | {e.get('guild_name') or 'UNRESOLVED'} | {e.get('channel_name') or 'UNRESOLVED'} | {e.get('thread_name') or ''} | {e.get('agent_owner') or ''} | {e.get('status') or ''} | {'; '.join(e.get('used_by',[]))} |" ) lines.append('') lines.append('## Unresolved IDs') for e in entries: if e.get('status') == 'unresolved': lines.append(f"- `{e.get('kind')}:{e.get('id')}` (agent `{e.get('agent_owner')}`)") lines.append('') lines.append('## Manual Resolution') lines.append('1. Add/patch explicit values in `/home/node/.openclaw/memory/channel-name-overrides.json`.') lines.append('2. Re-run `scripts/resolve-channel-names.sh` to merge overrides + observations.') lines.append('3. Run `scripts/validate-channel-registry.sh` and ensure it returns `OK`.') md_path.write_text('\n'.join(lines) + '\n') print(f'Updated registry. Changed entries: {changed}') PY