Files
openclaw-ops/scripts/resolve-channel-names.sh

209 lines
7.8 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
REG_JSON="/home/node/.openclaw/memory/channel-registry.json"
REG_MD="/home/node/.openclaw/memory/channel-registry.md"
OVERRIDES_JSON="/home/node/.openclaw/memory/channel-name-overrides.json"
python3 - <<'PY'
import json, re
from pathlib import Path
from datetime import datetime, timezone
reg_path = Path('/home/node/.openclaw/memory/channel-registry.json')
md_path = Path('/home/node/.openclaw/memory/channel-registry.md')
overrides_path = Path('/home/node/.openclaw/memory/channel-name-overrides.json')
reg = json.loads(reg_path.read_text())
entries = reg.get('entries', [])
idx = {(e['platform'], e['kind'], e['id']): e for e in entries}
obs = {}
def note(cid, key, value):
if not cid or not value:
return
obs.setdefault(cid, {})[key] = value
# 1) Explicit overrides win (manual curated)
if overrides_path.exists():
try:
ov = json.loads(overrides_path.read_text())
for cid, data in (ov.get('discord', {}) or {}).items():
if isinstance(data, dict):
for k in ('guild_name','channel_name','thread_name','guild_id'):
if data.get(k):
note(cid, k, data[k])
except Exception:
pass
# Ensure override IDs are represented even if not referenced yet
for cid, data in (ov.get('discord', {}) or {}).items() if 'ov' in locals() else []:
if not isinstance(data, dict):
continue
kind = 'guild' if data.get('guild_name') and not data.get('channel_name') and not data.get('thread_name') else 'channel'
key = ('discord', kind, cid)
if key not in idx:
entries.append({
'platform': 'discord',
'kind': kind,
'id': cid,
'guild_id': data.get('guild_id') or (cid if kind == 'guild' else None),
'guild_name': data.get('guild_name'),
'channel_name': data.get('channel_name'),
'thread_name': data.get('thread_name'),
'agent_owner': None,
'used_by': ['override:manual'],
'purpose': 'manual override registry seed',
'status': 'active' if (data.get('guild_name') or data.get('channel_name') or data.get('thread_name')) else 'unresolved',
'last_verified_utc': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
})
idx[key] = entries[-1]
# 2) Scan transcripts for embedded metadata (bounded to prevent huge-context blowups)
roots = [
Path('/home/node/.openclaw/workspace'),
Path('/home/node/.openclaw/workspace-home'),
Path('/home/node/.openclaw/workspace-security'),
Path('/home/node/.openclaw/workspace-research'),
]
MAX_JSONL_FILES = 400
MAX_FILE_SCAN_BYTES = 1_000_000
MAX_TOTAL_SCAN_BYTES = 25_000_000
pat_discord = re.compile(r'discord:(\d+)#([A-Za-z0-9_-]+)')
pat_conv = re.compile(r'channel id:(\d+)')
pat_group_channel = re.compile(r'"group_channel"\s*:\s*"(#?[^"]+)"')
pat_thread = re.compile(r'"thread_label"\s*:\s*"Discord thread\s+#([^"]+)\s+\s+([^"]+)"')
pat_subject = re.compile(r'"group_subject"\s*:\s*"(#?[^"]+)"')
def bounded_read_jsonl(path: Path, limit_bytes: int) -> str:
size = path.stat().st_size
with path.open('rb') as fh:
if size <= limit_bytes:
data = fh.read(limit_bytes)
else:
head = fh.read(limit_bytes // 2)
fh.seek(max(0, size - (limit_bytes // 2)))
tail = fh.read(limit_bytes // 2)
data = head + b'\n...TRUNCATED...\n' + tail
return data.decode('utf-8', errors='ignore')
jsonl_paths = []
for root in roots:
if root.exists():
jsonl_paths.extend(root.rglob('*.jsonl'))
bytes_scanned = 0
for p in sorted(jsonl_paths)[:MAX_JSONL_FILES]:
if bytes_scanned >= MAX_TOTAL_SCAN_BYTES:
break
budget_left = MAX_TOTAL_SCAN_BYTES - bytes_scanned
per_file_cap = min(MAX_FILE_SCAN_BYTES, budget_left)
if per_file_cap <= 0:
break
try:
txt = bounded_read_jsonl(p, per_file_cap)
except Exception:
continue
bytes_scanned += len(txt.encode('utf-8', errors='ignore'))
# pattern: discord:<id>#name
for m in pat_discord.finditer(txt):
cid, cname = m.group(1), m.group(2)
if not cname.startswith('#'):
cname = '#' + cname
note(cid, 'channel_name', cname)
# conversation metadata blocks
for m in pat_conv.finditer(txt):
cid = m.group(1)
window = txt[max(0, m.start()-1200): m.end()+1200]
gm = pat_group_channel.search(window)
if gm:
cname = gm.group(1)
if cname and not cname.startswith('#'):
cname = '#' + cname
note(cid, 'channel_name', cname)
sm = pat_subject.search(window)
if sm and not obs.get(cid, {}).get('channel_name'):
sname = sm.group(1)
if sname and not sname.startswith('#'):
sname = '#' + sname
note(cid, 'channel_name', sname)
tm = pat_thread.search(window)
if tm:
# forum-ish parent and thread label
forum = tm.group(1).strip()
tname = tm.group(2).strip()
if forum:
if not forum.startswith('#'):
forum = '#' + forum
note(cid, 'channel_name', forum)
note(cid, 'thread_name', tname)
# 3) Apply observations to registry
now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
changed = 0
for e in entries:
if e.get('platform') != 'discord':
continue
cid = e.get('id')
data = obs.get(cid, {})
before = json.dumps(e, sort_keys=True)
for k in ('guild_id','guild_name','channel_name','thread_name'):
if data.get(k) and not e.get(k):
e[k] = data[k]
# status rule
if e.get('kind') == 'guild':
e['status'] = 'active' if e.get('guild_name') else 'unresolved'
else:
e['status'] = 'active' if (e.get('channel_name') or e.get('thread_name')) else 'unresolved'
e['last_verified_utc'] = now
after = json.dumps(e, sort_keys=True)
if before != after:
changed += 1
reg['updated_utc'] = now
reg_path.write_text(json.dumps(reg, indent=2) + '\n')
# 4) Render markdown table from JSON
lines = []
lines.append('# Channel Registry')
lines.append('')
lines.append('Global ID→name registry for cron delivery targets and routing bindings.')
lines.append('')
lines.append('## Resolution Policy')
lines.append('- IDs are canonical; names are metadata and may drift.')
lines.append('- Auto-resolution uses transcript/session metadata + optional overrides file.')
lines.append('- Any referenced entry with `status: unresolved` must be manually resolved.')
lines.append('')
lines.append('## Entries')
lines.append('')
lines.append('| Platform | Kind | ID | Guild ID | Guild Name | Channel Name | Thread Name | Agent Owner | Status | Used By |')
lines.append('|---|---|---|---|---|---|---|---|---|---|')
for e in sorted(entries, key=lambda x: (x['platform'], x['kind'], x['id'])):
lines.append(
f"| {e.get('platform','')} | {e.get('kind','')} | `{e.get('id','')}` | `{e.get('guild_id') or ''}` | {e.get('guild_name') or 'UNRESOLVED'} | {e.get('channel_name') or 'UNRESOLVED'} | {e.get('thread_name') or ''} | {e.get('agent_owner') or ''} | {e.get('status') or ''} | {'; '.join(e.get('used_by',[]))} |"
)
lines.append('')
lines.append('## Unresolved IDs')
for e in entries:
if e.get('status') == 'unresolved':
lines.append(f"- `{e.get('kind')}:{e.get('id')}` (agent `{e.get('agent_owner')}`)")
lines.append('')
lines.append('## Manual Resolution')
lines.append('1. Add/patch explicit values in `/home/node/.openclaw/memory/channel-name-overrides.json`.')
lines.append('2. Re-run `scripts/resolve-channel-names.sh` to merge overrides + observations.')
lines.append('3. Run `scripts/validate-channel-registry.sh` and ensure it returns `OK`.')
md_path.write_text('\n'.join(lines) + '\n')
print(f'Updated registry. Changed entries: {changed}')
PY