openclaw-ops/scripts/resolve-channel-names.sh

#!/usr/bin/env bash
set -euo pipefail

REG_JSON="/home/node/.openclaw/memory/channel-registry.json"
REG_MD="/home/node/.openclaw/memory/channel-registry.md"
OVERRIDES_JSON="/home/node/.openclaw/memory/channel-name-overrides.json"

python3 - <<'PY'
import json, re
from pathlib import Path
from datetime import datetime, timezone

reg_path = Path('/home/node/.openclaw/memory/channel-registry.json')
md_path = Path('/home/node/.openclaw/memory/channel-registry.md')
overrides_path = Path('/home/node/.openclaw/memory/channel-name-overrides.json')

reg = json.loads(reg_path.read_text())
entries = reg.get('entries', [])
idx = {(e['platform'], e['kind'], e['id']): e for e in entries}

obs = {}

def note(cid, key, value):
    if not cid or not value:
        return
    obs.setdefault(cid, {})[key] = value

# 1) Explicit overrides win (manual curated)
if overrides_path.exists():
    try:
        ov = json.loads(overrides_path.read_text())
        for cid, data in (ov.get('discord', {}) or {}).items():
            if isinstance(data, dict):
                for k in ('guild_name','channel_name','thread_name','guild_id'):
                    if data.get(k):
                        note(cid, k, data[k])
    except Exception:
        pass

# Ensure override IDs are represented even if not referenced yet
for cid, data in (ov.get('discord', {}) or {}).items() if 'ov' in locals() else []:
    if not isinstance(data, dict):
        continue
    kind = 'guild' if data.get('guild_name') and not data.get('channel_name') and not data.get('thread_name') else 'channel'
    key = ('discord', kind, cid)
    if key not in idx:
        entries.append({
            'platform': 'discord',
            'kind': kind,
            'id': cid,
            'guild_id': data.get('guild_id') or (cid if kind == 'guild' else None),
            'guild_name': data.get('guild_name'),
            'channel_name': data.get('channel_name'),
            'thread_name': data.get('thread_name'),
            'agent_owner': None,
            'used_by': ['override:manual'],
            'purpose': 'manual override registry seed',
            'status': 'active' if (data.get('guild_name') or data.get('channel_name') or data.get('thread_name')) else 'unresolved',
            'last_verified_utc': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
        })
        idx[key] = entries[-1]

# 2) Scan transcripts for embedded metadata (bounded to prevent huge-context blowups)
roots = [
    Path('/home/node/.openclaw/workspace'),
    Path('/home/node/.openclaw/workspace-home'),
    Path('/home/node/.openclaw/workspace-security'),
    Path('/home/node/.openclaw/workspace-research'),
]

MAX_JSONL_FILES = 400
MAX_FILE_SCAN_BYTES = 1_000_000
MAX_TOTAL_SCAN_BYTES = 25_000_000

pat_discord = re.compile(r'discord:(\d+)#([A-Za-z0-9_-]+)')
pat_conv = re.compile(r'channel id:(\d+)')
pat_group_channel = re.compile(r'"group_channel"\s*:\s*"(#?[^"]+)"')
pat_thread = re.compile(r'"thread_label"\s*:\s*"Discord thread\s+#([^›"]+)\s+›\s+([^"]+)"')
pat_subject = re.compile(r'"group_subject"\s*:\s*"(#?[^"]+)"')

def bounded_read_jsonl(path: Path, limit_bytes: int) -> str:
    size = path.stat().st_size
    with path.open('rb') as fh:
        if size <= limit_bytes:
            data = fh.read(limit_bytes)
        else:
            head = fh.read(limit_bytes // 2)
            fh.seek(max(0, size - (limit_bytes // 2)))
            tail = fh.read(limit_bytes // 2)
            data = head + b'\n...TRUNCATED...\n' + tail
    return data.decode('utf-8', errors='ignore')

jsonl_paths = []
for root in roots:
    if root.exists():
        jsonl_paths.extend(root.rglob('*.jsonl'))

bytes_scanned = 0
for p in sorted(jsonl_paths)[:MAX_JSONL_FILES]:
    if bytes_scanned >= MAX_TOTAL_SCAN_BYTES:
        break
    budget_left = MAX_TOTAL_SCAN_BYTES - bytes_scanned
    per_file_cap = min(MAX_FILE_SCAN_BYTES, budget_left)
    if per_file_cap <= 0:
        break
    try:
        txt = bounded_read_jsonl(p, per_file_cap)
    except Exception:
        continue
    bytes_scanned += len(txt.encode('utf-8', errors='ignore'))

    # pattern: discord:<id>#name
    for m in pat_discord.finditer(txt):
        cid, cname = m.group(1), m.group(2)
        if not cname.startswith('#'):
            cname = '#' + cname
        note(cid, 'channel_name', cname)

    # conversation metadata blocks
    for m in pat_conv.finditer(txt):
        cid = m.group(1)
        window = txt[max(0, m.start()-1200): m.end()+1200]
        gm = pat_group_channel.search(window)
        if gm:
            cname = gm.group(1)
            if cname and not cname.startswith('#'):
                cname = '#' + cname
            note(cid, 'channel_name', cname)
        sm = pat_subject.search(window)
        if sm and not obs.get(cid, {}).get('channel_name'):
            sname = sm.group(1)
            if sname and not sname.startswith('#'):
                sname = '#' + sname
            note(cid, 'channel_name', sname)
        tm = pat_thread.search(window)
        if tm:
            # forum-ish parent and thread label
            forum = tm.group(1).strip()
            tname = tm.group(2).strip()
            if forum:
                if not forum.startswith('#'):
                    forum = '#' + forum
                note(cid, 'channel_name', forum)
            note(cid, 'thread_name', tname)

# 3) Apply observations to registry
now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
changed = 0
for e in entries:
    if e.get('platform') != 'discord':
        continue
    cid = e.get('id')
    data = obs.get(cid, {})
    before = json.dumps(e, sort_keys=True)

    for k in ('guild_id','guild_name','channel_name','thread_name'):
        if data.get(k) and not e.get(k):
            e[k] = data[k]

    # status rule
    if e.get('kind') == 'guild':
        e['status'] = 'active' if e.get('guild_name') else 'unresolved'
    else:
        e['status'] = 'active' if (e.get('channel_name') or e.get('thread_name')) else 'unresolved'

    e['last_verified_utc'] = now
    after = json.dumps(e, sort_keys=True)
    if before != after:
        changed += 1

reg['updated_utc'] = now
reg_path.write_text(json.dumps(reg, indent=2) + '\n')

# 4) Render markdown table from JSON
lines = []
lines.append('# Channel Registry')
lines.append('')
lines.append('Global ID→name registry for cron delivery targets and routing bindings.')
lines.append('')
lines.append('## Resolution Policy')
lines.append('- IDs are canonical; names are metadata and may drift.')
lines.append('- Auto-resolution uses transcript/session metadata + optional overrides file.')
lines.append('- Any referenced entry with `status: unresolved` must be manually resolved.')
lines.append('')
lines.append('## Entries')
lines.append('')
lines.append('| Platform | Kind | ID | Guild ID | Guild Name | Channel Name | Thread Name | Agent Owner | Status | Used By |')
lines.append('|---|---|---|---|---|---|---|---|---|---|')
for e in sorted(entries, key=lambda x: (x['platform'], x['kind'], x['id'])):
    lines.append(
        f"| {e.get('platform','')} | {e.get('kind','')} | `{e.get('id','')}` | `{e.get('guild_id') or ''}` | {e.get('guild_name') or 'UNRESOLVED'} | {e.get('channel_name') or 'UNRESOLVED'} | {e.get('thread_name') or ''} | {e.get('agent_owner') or ''} | {e.get('status') or ''} | {'; '.join(e.get('used_by',[]))} |"
    )

lines.append('')
lines.append('## Unresolved IDs')
for e in entries:
    if e.get('status') == 'unresolved':
        lines.append(f"- `{e.get('kind')}:{e.get('id')}` (agent `{e.get('agent_owner')}`)")

lines.append('')
lines.append('## Manual Resolution')
lines.append('1. Add/patch explicit values in `/home/node/.openclaw/memory/channel-name-overrides.json`.')
lines.append('2. Re-run `scripts/resolve-channel-names.sh` to merge overrides + observations.')
lines.append('3. Run `scripts/validate-channel-registry.sh` and ensure it returns `OK`.')

md_path.write_text('\n'.join(lines) + '\n')
print(f'Updated registry. Changed entries: {changed}')
PY