fix: switch to OpenAI client for llm-proxy compatibility

- Replace Anthropic SDK with OpenAI SDK (proxy uses OpenAI-compatible API)
- Add combined post fetching to include hidden/moderated posts
- Update system prompt to be more thorough at catching violations
- Add --debug flag to CLI for troubleshooting
This commit is contained in:
Ben
2026-01-06 03:58:08 +00:00
parent e9479e442a
commit e09f74578d
5 changed files with 215 additions and 59 deletions

View File

@@ -5,14 +5,13 @@ import os
from dataclasses import dataclass, field
from typing import Any, Callable
from anthropic import Anthropic
from anthropic.types import TextBlock
from openai import OpenAI
from .discourse import UserPost
from .rules import ALL_RULES, RULES_BY_ID, Rule, Severity, format_rules_for_prompt
# Default model to use
DEFAULT_MODEL = "claude-sonnet-4-5-20250514"
# Default model to use (OpenAI-compatible format for the proxy)
DEFAULT_MODEL = "antigravity/claude-sonnet-4-5"
# Batch size for analyzing posts
BATCH_SIZE = 5
@@ -73,18 +72,23 @@ SYSTEM_PROMPT = """You are an EVE Online forum moderator assistant. Your role is
## Important Context
EVE Online is a competitive PvP MMO where trash talk between rival corporations and alliances is part of the culture. However, there are limits:
- In-game rivalry and competitive banter is generally acceptable
- Personal attacks that go beyond the game are not acceptable
- Discrimination, hate speech, and harassment are never acceptable regardless of "roleplay"
- In-game rivalry and competitive banter about corporations/alliances is generally acceptable
- Personal attacks directed at real players (not just their in-game characters) are NOT acceptable
- Discrimination, hate speech, and harassment are NEVER acceptable regardless of "roleplay"
- Criticism of game mechanics or CCP decisions is allowed if constructive
- Excessive profanity, especially when directed at other players, should be flagged
- Trolling and baiting behavior should be identified even if individually posts seem mild
## Your Task
For each post provided, analyze whether it violates any forum rules. Consider:
1. The context of EVE Online's competitive culture
2. Whether the post is directed at in-game entities (acceptable) vs. real people (less acceptable)
3. The severity of the violation if one exists
4. Your confidence level in the assessment
For each post provided, analyze whether it violates any forum rules. Be thorough - moderators need to catch violations, not excuse them. Consider:
1. The tone and intent of the post
2. Whether insults are directed at players vs in-game entities
3. Patterns of behavior (repeated baiting, antagonizing)
4. The severity of the violation if one exists
5. Your confidence level in the assessment
Flag posts that are borderline - it's better to surface potential issues for human review than to miss violations.
## Output Format
@@ -108,7 +112,7 @@ Respond with valid JSON in this exact format:
}}
If a post has no violations, return an empty violations array and clean=true.
Be conservative - only flag clear violations. Borderline cases should have lower confidence scores.
For borderline cases, include them with lower confidence scores (0.3-0.5) so moderators can review.
"""
@@ -141,8 +145,8 @@ class Analyzer:
)
self.model = model
# Initialize Anthropic client with custom base URL
self.client = Anthropic(
# Initialize OpenAI client with custom base URL (proxy uses OpenAI-compatible API)
self.client = OpenAI(
api_key=self.api_key,
base_url=self.base_url,
)
@@ -273,21 +277,19 @@ CONTENT:
Analyze each post and return your findings in the specified JSON format."""
try:
response = self.client.messages.create(
response = self.client.chat.completions.create(
model=self.model,
max_tokens=4096,
system=system_prompt,
messages=[{"role": "user", "content": user_message}],
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message},
],
)
# Extract text from the response (handle different content block types)
text_block = next(
(block for block in response.content if isinstance(block, TextBlock)),
None,
)
if text_block is None:
# Extract text from the response
response_text = response.choices[0].message.content
if response_text is None:
raise AnalyzerError("No text content in LLM response")
response_text = text_block.text
return self._parse_response(response_text, posts)
except Exception as e:

View File

@@ -61,6 +61,11 @@ def cli() -> None:
default=True,
help="Fetch full post content (slower but more accurate)",
)
@click.option(
"--debug",
is_flag=True,
help="Show debug output including sample post content",
)
def review(
username: str,
days: int,
@@ -69,6 +74,7 @@ def review(
verbose: bool,
max_posts: int,
enrich: bool,
debug: bool,
) -> None:
"""
Review a user's forum posts for potential rule violations.
@@ -94,6 +100,7 @@ def review(
verbose=verbose,
max_posts=max_posts,
enrich=enrich,
debug=debug,
)
)
@@ -118,6 +125,7 @@ async def _review_user(
verbose: bool,
max_posts: int,
enrich: bool,
debug: bool = False,
) -> None:
"""Async implementation of user review."""
try:
@@ -134,14 +142,16 @@ async def _review_user(
console.print(f"[red]User not found:[/red] {username}")
raise SystemExit(1)
# Fetch posts
# Fetch posts (using combined method to include hidden posts)
with console.status(f"[bold blue]Fetching posts from last {days} days..."):
posts = await client.get_user_posts(
posts = await client.get_user_posts_combined(
username=username,
days=days,
max_posts=max_posts,
)
console.print(f"[green]Found {len(posts)} posts[/green]")
console.print(
f"[green]Found {len(posts)} posts (including hidden)[/green]"
)
if not posts:
console.print(
@@ -155,6 +165,26 @@ async def _review_user(
posts = await client.enrich_posts(posts)
console.print("[green]Enriched posts with full content[/green]")
# Debug: show sample post content
if debug and posts:
console.print("\n[bold yellow]DEBUG: Sample post content[/bold yellow]")
for i, post in enumerate(posts[:3]): # Show first 3 posts
console.print(f"\n[cyan]--- Post {i + 1} ---[/cyan]")
console.print(f"[dim]Topic:[/dim] {post.topic_title}")
console.print(f"[dim]URL:[/dim] {post.url}")
console.print(f"[dim]Date:[/dim] {post.created_at}")
console.print(
f"[dim]Raw length:[/dim] {len(post.content_raw)} chars"
)
console.print(
f"[dim]Text length:[/dim] {len(post.content_text)} chars"
)
content_preview = post.content_text[:500]
if len(post.content_text) > 500:
content_preview += "..."
console.print(f"[dim]Content:[/dim]\n{content_preview}")
console.print("\n")
except DiscourseError as e:
console.print(f"[red]Forum Error:[/red] {e}")
raise SystemExit(1)

View File

@@ -338,3 +338,124 @@ class DiscourseClient:
enriched.append(post)
return enriched
async def get_user_posts_via_search(
self,
username: str,
days: int = 30,
max_posts: int = 200,
include_hidden: bool = True,
) -> list[UserPost]:
"""
Get a user's posts using the search API, which includes hidden posts.
This method is slower but catches posts that don't appear in the
activity feed (e.g., hidden/moderated posts).
Args:
username: The forum username
days: Number of days to look back
max_posts: Maximum number of posts to fetch
include_hidden: Whether to include hidden posts
Returns:
List of UserPost objects, newest first
"""
posts: list[UserPost] = []
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
page = 1
while len(posts) < max_posts:
# Use Discourse search API with @username filter
data = await self._request(
"GET",
"/search.json",
params={
"q": f"@{username} order:latest",
"page": page,
},
)
search_posts = data.get("posts", [])
if not search_posts:
break
for post_data in search_posts:
# Only include posts by this user
if post_data.get("username", "").lower() != username.lower():
continue
created_str = post_data.get("created_at", "")
if not created_str:
continue
created_at = parse_date(created_str)
if created_at.tzinfo is None:
created_at = created_at.replace(tzinfo=timezone.utc)
# Stop if we've gone past the cutoff
if created_at < cutoff:
return posts
post = UserPost(
post_id=post_data.get("id", 0),
post_number=post_data.get("post_number", 1),
topic_id=post_data.get("topic_id", 0),
topic_title=post_data.get("topic_title", "Unknown Topic"),
topic_slug=post_data.get("topic_slug", "topic"),
content_raw=post_data.get("blurb", ""),
content_cooked=post_data.get("blurb", ""),
created_at=created_at,
category_name=None,
)
posts.append(post)
if len(posts) >= max_posts:
break
page += 1
# Safety limit on pages
if page > 20:
break
return posts
async def get_user_posts_combined(
self,
username: str,
days: int = 30,
max_posts: int = 200,
) -> list[UserPost]:
"""
Get a user's posts using multiple methods to ensure hidden posts are included.
Combines the activity feed (fast) with search API (catches hidden posts).
Args:
username: The forum username
days: Number of days to look back
max_posts: Maximum number of posts to fetch
Returns:
List of UserPost objects, newest first, deduplicated
"""
# Get posts from activity feed (fast, but misses hidden)
activity_posts = await self.get_user_posts(username, days, max_posts)
# Get posts from search (slower, but includes hidden)
search_posts = await self.get_user_posts_via_search(username, days, max_posts)
# Combine and deduplicate by post_id
seen_ids: set[int] = set()
combined: list[UserPost] = []
for post in activity_posts + search_posts:
if post.post_id not in seen_ids:
seen_ids.add(post.post_id)
combined.append(post)
# Sort by date, newest first
combined.sort(key=lambda p: p.created_at, reverse=True)
return combined[:max_posts]