209 lines
7.8 KiB
Bash
Executable File
209 lines
7.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
set -euo pipefail
|
||
|
||
REG_JSON="/home/node/.openclaw/memory/channel-registry.json"
|
||
REG_MD="/home/node/.openclaw/memory/channel-registry.md"
|
||
OVERRIDES_JSON="/home/node/.openclaw/memory/channel-name-overrides.json"
|
||
|
||
python3 - <<'PY'
|
||
import json, re
|
||
from pathlib import Path
|
||
from datetime import datetime, timezone
|
||
|
||
reg_path = Path('/home/node/.openclaw/memory/channel-registry.json')
|
||
md_path = Path('/home/node/.openclaw/memory/channel-registry.md')
|
||
overrides_path = Path('/home/node/.openclaw/memory/channel-name-overrides.json')
|
||
|
||
reg = json.loads(reg_path.read_text())
|
||
entries = reg.get('entries', [])
|
||
idx = {(e['platform'], e['kind'], e['id']): e for e in entries}
|
||
|
||
obs = {}
|
||
|
||
def note(cid, key, value):
|
||
if not cid or not value:
|
||
return
|
||
obs.setdefault(cid, {})[key] = value
|
||
|
||
# 1) Explicit overrides win (manual curated)
|
||
if overrides_path.exists():
|
||
try:
|
||
ov = json.loads(overrides_path.read_text())
|
||
for cid, data in (ov.get('discord', {}) or {}).items():
|
||
if isinstance(data, dict):
|
||
for k in ('guild_name','channel_name','thread_name','guild_id'):
|
||
if data.get(k):
|
||
note(cid, k, data[k])
|
||
except Exception:
|
||
pass
|
||
|
||
# Ensure override IDs are represented even if not referenced yet
|
||
for cid, data in (ov.get('discord', {}) or {}).items() if 'ov' in locals() else []:
|
||
if not isinstance(data, dict):
|
||
continue
|
||
kind = 'guild' if data.get('guild_name') and not data.get('channel_name') and not data.get('thread_name') else 'channel'
|
||
key = ('discord', kind, cid)
|
||
if key not in idx:
|
||
entries.append({
|
||
'platform': 'discord',
|
||
'kind': kind,
|
||
'id': cid,
|
||
'guild_id': data.get('guild_id') or (cid if kind == 'guild' else None),
|
||
'guild_name': data.get('guild_name'),
|
||
'channel_name': data.get('channel_name'),
|
||
'thread_name': data.get('thread_name'),
|
||
'agent_owner': None,
|
||
'used_by': ['override:manual'],
|
||
'purpose': 'manual override registry seed',
|
||
'status': 'active' if (data.get('guild_name') or data.get('channel_name') or data.get('thread_name')) else 'unresolved',
|
||
'last_verified_utc': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||
})
|
||
idx[key] = entries[-1]
|
||
|
||
# 2) Scan transcripts for embedded metadata (bounded to prevent huge-context blowups)
|
||
roots = [
|
||
Path('/home/node/.openclaw/workspace'),
|
||
Path('/home/node/.openclaw/workspace-home'),
|
||
Path('/home/node/.openclaw/workspace-security'),
|
||
Path('/home/node/.openclaw/workspace-research'),
|
||
]
|
||
|
||
MAX_JSONL_FILES = 400
|
||
MAX_FILE_SCAN_BYTES = 1_000_000
|
||
MAX_TOTAL_SCAN_BYTES = 25_000_000
|
||
|
||
pat_discord = re.compile(r'discord:(\d+)#([A-Za-z0-9_-]+)')
|
||
pat_conv = re.compile(r'channel id:(\d+)')
|
||
pat_group_channel = re.compile(r'"group_channel"\s*:\s*"(#?[^"]+)"')
|
||
pat_thread = re.compile(r'"thread_label"\s*:\s*"Discord thread\s+#([^›"]+)\s+›\s+([^"]+)"')
|
||
pat_subject = re.compile(r'"group_subject"\s*:\s*"(#?[^"]+)"')
|
||
|
||
def bounded_read_jsonl(path: Path, limit_bytes: int) -> str:
|
||
size = path.stat().st_size
|
||
with path.open('rb') as fh:
|
||
if size <= limit_bytes:
|
||
data = fh.read(limit_bytes)
|
||
else:
|
||
head = fh.read(limit_bytes // 2)
|
||
fh.seek(max(0, size - (limit_bytes // 2)))
|
||
tail = fh.read(limit_bytes // 2)
|
||
data = head + b'\n...TRUNCATED...\n' + tail
|
||
return data.decode('utf-8', errors='ignore')
|
||
|
||
jsonl_paths = []
|
||
for root in roots:
|
||
if root.exists():
|
||
jsonl_paths.extend(root.rglob('*.jsonl'))
|
||
|
||
bytes_scanned = 0
|
||
for p in sorted(jsonl_paths)[:MAX_JSONL_FILES]:
|
||
if bytes_scanned >= MAX_TOTAL_SCAN_BYTES:
|
||
break
|
||
budget_left = MAX_TOTAL_SCAN_BYTES - bytes_scanned
|
||
per_file_cap = min(MAX_FILE_SCAN_BYTES, budget_left)
|
||
if per_file_cap <= 0:
|
||
break
|
||
try:
|
||
txt = bounded_read_jsonl(p, per_file_cap)
|
||
except Exception:
|
||
continue
|
||
bytes_scanned += len(txt.encode('utf-8', errors='ignore'))
|
||
|
||
# pattern: discord:<id>#name
|
||
for m in pat_discord.finditer(txt):
|
||
cid, cname = m.group(1), m.group(2)
|
||
if not cname.startswith('#'):
|
||
cname = '#' + cname
|
||
note(cid, 'channel_name', cname)
|
||
|
||
# conversation metadata blocks
|
||
for m in pat_conv.finditer(txt):
|
||
cid = m.group(1)
|
||
window = txt[max(0, m.start()-1200): m.end()+1200]
|
||
gm = pat_group_channel.search(window)
|
||
if gm:
|
||
cname = gm.group(1)
|
||
if cname and not cname.startswith('#'):
|
||
cname = '#' + cname
|
||
note(cid, 'channel_name', cname)
|
||
sm = pat_subject.search(window)
|
||
if sm and not obs.get(cid, {}).get('channel_name'):
|
||
sname = sm.group(1)
|
||
if sname and not sname.startswith('#'):
|
||
sname = '#' + sname
|
||
note(cid, 'channel_name', sname)
|
||
tm = pat_thread.search(window)
|
||
if tm:
|
||
# forum-ish parent and thread label
|
||
forum = tm.group(1).strip()
|
||
tname = tm.group(2).strip()
|
||
if forum:
|
||
if not forum.startswith('#'):
|
||
forum = '#' + forum
|
||
note(cid, 'channel_name', forum)
|
||
note(cid, 'thread_name', tname)
|
||
|
||
# 3) Apply observations to registry
|
||
now = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||
changed = 0
|
||
for e in entries:
|
||
if e.get('platform') != 'discord':
|
||
continue
|
||
cid = e.get('id')
|
||
data = obs.get(cid, {})
|
||
before = json.dumps(e, sort_keys=True)
|
||
|
||
for k in ('guild_id','guild_name','channel_name','thread_name'):
|
||
if data.get(k) and not e.get(k):
|
||
e[k] = data[k]
|
||
|
||
# status rule
|
||
if e.get('kind') == 'guild':
|
||
e['status'] = 'active' if e.get('guild_name') else 'unresolved'
|
||
else:
|
||
e['status'] = 'active' if (e.get('channel_name') or e.get('thread_name')) else 'unresolved'
|
||
|
||
e['last_verified_utc'] = now
|
||
after = json.dumps(e, sort_keys=True)
|
||
if before != after:
|
||
changed += 1
|
||
|
||
reg['updated_utc'] = now
|
||
reg_path.write_text(json.dumps(reg, indent=2) + '\n')
|
||
|
||
# 4) Render markdown table from JSON
|
||
lines = []
|
||
lines.append('# Channel Registry')
|
||
lines.append('')
|
||
lines.append('Global ID→name registry for cron delivery targets and routing bindings.')
|
||
lines.append('')
|
||
lines.append('## Resolution Policy')
|
||
lines.append('- IDs are canonical; names are metadata and may drift.')
|
||
lines.append('- Auto-resolution uses transcript/session metadata + optional overrides file.')
|
||
lines.append('- Any referenced entry with `status: unresolved` must be manually resolved.')
|
||
lines.append('')
|
||
lines.append('## Entries')
|
||
lines.append('')
|
||
lines.append('| Platform | Kind | ID | Guild ID | Guild Name | Channel Name | Thread Name | Agent Owner | Status | Used By |')
|
||
lines.append('|---|---|---|---|---|---|---|---|---|---|')
|
||
for e in sorted(entries, key=lambda x: (x['platform'], x['kind'], x['id'])):
|
||
lines.append(
|
||
f"| {e.get('platform','')} | {e.get('kind','')} | `{e.get('id','')}` | `{e.get('guild_id') or ''}` | {e.get('guild_name') or 'UNRESOLVED'} | {e.get('channel_name') or 'UNRESOLVED'} | {e.get('thread_name') or ''} | {e.get('agent_owner') or ''} | {e.get('status') or ''} | {'; '.join(e.get('used_by',[]))} |"
|
||
)
|
||
|
||
lines.append('')
|
||
lines.append('## Unresolved IDs')
|
||
for e in entries:
|
||
if e.get('status') == 'unresolved':
|
||
lines.append(f"- `{e.get('kind')}:{e.get('id')}` (agent `{e.get('agent_owner')}`)")
|
||
|
||
lines.append('')
|
||
lines.append('## Manual Resolution')
|
||
lines.append('1. Add/patch explicit values in `/home/node/.openclaw/memory/channel-name-overrides.json`.')
|
||
lines.append('2. Re-run `scripts/resolve-channel-names.sh` to merge overrides + observations.')
|
||
lines.append('3. Run `scripts/validate-channel-registry.sh` and ensure it returns `OK`.')
|
||
|
||
md_path.write_text('\n'.join(lines) + '\n')
|
||
print(f'Updated registry. Changed entries: {changed}')
|
||
PY
|