refactor: compact report format with 85% confidence threshold

- Update report format to match Gemini-style table output
- Add confidence threshold (85% default) to filter noise
- Add --show-all flag to display all violations
- Combine quote and explanation in single Description column
- Much cleaner output: 12 high-confidence vs 51 total violations
This commit is contained in:
Ben
2026-01-06 04:15:46 +00:00
parent e09f74578d
commit 6dd4e3b6db
2 changed files with 105 additions and 118 deletions

View File

@@ -66,6 +66,11 @@ def cli() -> None:
is_flag=True,
help="Show debug output including sample post content",
)
@click.option(
"--show-all",
is_flag=True,
help="Show all violations regardless of confidence (default: 85%+ only)",
)
def review(
username: str,
days: int,
@@ -75,6 +80,7 @@ def review(
max_posts: int,
enrich: bool,
debug: bool,
show_all: bool,
) -> None:
"""
Review a user's forum posts for potential rule violations.
@@ -101,6 +107,7 @@ def review(
max_posts=max_posts,
enrich=enrich,
debug=debug,
show_all=show_all,
)
)
@@ -126,6 +133,7 @@ async def _review_user(
max_posts: int,
enrich: bool,
debug: bool = False,
show_all: bool = False,
) -> None:
"""Async implementation of user review."""
try:
@@ -217,7 +225,8 @@ async def _review_user(
)
# Generate report
report_gen = ReportGenerator()
confidence_threshold = 0.0 if show_all else 0.85
report_gen = ReportGenerator(confidence_threshold=confidence_threshold)
# Print to terminal
report_gen.print_summary(user, analyses, days)

View File

@@ -11,41 +11,44 @@ from rich.text import Text
from .analyzer import PostAnalysis, Violation
from .discourse import UserProfile
# Default confidence threshold (85%)
DEFAULT_CONFIDENCE_THRESHOLD = 0.85
class ReportGenerator:
"""Generates moderation reports in terminal and markdown formats."""
def __init__(self, output_dir: Path | str = "reports"):
def __init__(
self,
output_dir: Path | str = "reports",
confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
):
"""
Initialize the report generator.
Args:
output_dir: Directory to save markdown reports
confidence_threshold: Minimum confidence to include violations (0.0-1.0)
"""
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
self.console = Console()
self.confidence_threshold = confidence_threshold
def _severity_color(self, severity: str) -> str:
"""Get color for severity level."""
colors = {
"low": "yellow",
"medium": "orange1",
"high": "red",
"critical": "bold red",
}
return colors.get(severity, "white")
def _severity_emoji(self, severity: str) -> str:
"""Get indicator for severity level (Discourse-friendly)."""
# Using text indicators instead of emoji for broader compatibility
indicators = {
"low": "[LOW]",
"medium": "[MED]",
"high": "[HIGH]",
"critical": "[CRITICAL]",
}
return indicators.get(severity, "[-]")
def _filter_violations(
self, analyses: list[PostAnalysis]
) -> list[tuple[PostAnalysis, list[Violation]]]:
"""Filter violations by confidence threshold."""
results = []
for analysis in analyses:
high_conf = [
v
for v in analysis.violations
if v.confidence >= self.confidence_threshold
]
if high_conf:
results.append((analysis, high_conf))
return results
def print_summary(
self,
@@ -61,8 +64,8 @@ class ReportGenerator:
analyses: List of post analyses
days: Number of days analyzed
"""
violations = [a for a in analyses if a.has_violations]
total_violations = sum(len(a.violations) for a in violations)
filtered = self._filter_violations(analyses)
total_violations = sum(len(v) for _, v in filtered)
# Header
self.console.print()
@@ -71,37 +74,47 @@ class ReportGenerator:
f"[bold]User Review: {user.username}[/bold]\n"
f"Period: Last {days} days\n"
f"Posts Analyzed: {len(analyses)}\n"
f"Posts with Violations: {len(violations)}\n"
f"Total Violations Found: {total_violations}",
f"Posts with Violations: {len(filtered)}\n"
f"Total Violations Found: {total_violations}\n"
f"[dim](confidence threshold: {self.confidence_threshold:.0%})[/dim]",
title="EVE Forum Moderation Report",
border_style="blue",
)
)
if not violations:
if not filtered:
self.console.print(
"\n[green]No rule violations detected in the analyzed posts.[/green]\n"
"\n[green]No rule violations detected above confidence threshold.[/green]\n"
)
return
# Summary table
table = Table(title="Violations Summary", show_header=True, header_style="bold")
# Compact table format
table = Table(show_header=True, header_style="bold", show_lines=True)
table.add_column("Date", style="dim", width=12)
table.add_column("Topic", width=40, overflow="ellipsis")
table.add_column("Rule", width=25)
table.add_column("Severity", width=10, justify="center")
table.add_column("Confidence", width=10, justify="center")
table.add_column("Post Link", width=50)
table.add_column("Rule", width=20)
table.add_column("Description", width=60)
for analysis, violations in filtered:
for v in violations:
# Create a compact post reference
post_ref = f"{analysis.post.topic_title[:35]}..."
if len(analysis.post.topic_title) <= 35:
post_ref = analysis.post.topic_title
# Combine explanation with quote for compact view
description = v.explanation
if v.quote:
quote_preview = (
v.quote[:80] + "..." if len(v.quote) > 80 else v.quote
)
description = f'"{quote_preview}" - {v.explanation}'
for analysis in violations:
for v in analysis.violations:
table.add_row(
analysis.post.created_at.strftime("%Y-%m-%d"),
analysis.post.topic_title[:38] + "..."
if len(analysis.post.topic_title) > 40
else analysis.post.topic_title,
f"{v.rule_id}: {v.rule_name}",
Text(v.severity.upper(), style=self._severity_color(v.severity)),
f"{v.confidence:.0%}",
analysis.post.created_at.strftime("%b %d, %Y"),
post_ref,
v.rule_name,
description,
)
self.console.print(table)
@@ -114,14 +127,14 @@ class ReportGenerator:
Args:
analyses: List of post analyses with violations
"""
violations = [a for a in analyses if a.has_violations]
filtered = self._filter_violations(analyses)
if not violations:
if not filtered:
return
self.console.print("[bold]Detailed Analysis[/bold]\n")
for analysis in violations:
for analysis, violations in filtered:
self.console.print(
Panel(
f"[bold]{analysis.post.topic_title}[/bold]\n"
@@ -131,16 +144,14 @@ class ReportGenerator:
)
)
for v in analysis.violations:
for v in violations:
self.console.print(
f" [{self._severity_color(v.severity)}]"
f"{v.severity.upper()}[/] - "
f"[bold]Rule {v.rule_id}: {v.rule_name}[/bold] "
f"(confidence: {v.confidence:.0%})"
f" [bold]Rule {v.rule_id}: {v.rule_name}[/bold] "
f"({v.confidence:.0%})"
)
self.console.print(f" [dim]Explanation:[/dim] {v.explanation}")
if v.quote:
self.console.print(f' [dim]Quote:[/dim] "{v.quote}"')
self.console.print(f' [dim]"{v.quote}"[/dim]')
self.console.print(f" {v.explanation}")
self.console.print()
def generate_markdown(
@@ -160,106 +171,71 @@ class ReportGenerator:
Returns:
Markdown string
"""
violations = [a for a in analyses if a.has_violations]
total_violations = sum(len(a.violations) for a in violations)
filtered = self._filter_violations(analyses)
total_violations = sum(len(v) for _, v in filtered)
lines = [
f"## User Review: {user.username}",
"",
f"**Period:** Last {days} days (ending {datetime.now().strftime('%Y-%m-%d')})",
f"**Posts Analyzed:** {len(analyses)}",
f"**Posts with Violations:** {len(violations)}",
f"**Posts with Violations:** {len(filtered)}",
f"**Total Violations Found:** {total_violations}",
"",
]
if not violations:
if not filtered:
lines.extend(
[
"---",
"",
"*No rule violations detected in the analyzed posts.*",
"*No rule violations detected above confidence threshold.*",
"",
]
)
return "\n".join(lines)
# Summary table
# Compact table format matching Gemini style
lines.extend(
[
"---",
"",
"### Summary",
"### Rule Violations & Combative Content",
"",
"| Date | Topic | Rule | Severity | Confidence |",
"|------|-------|------|----------|------------|",
"| Date | Post Link | Rule | Description / Reasoning |",
"|------|-----------|------|-------------------------|",
]
)
for analysis in violations:
for v in analysis.violations:
topic_link = f"[{self._escape_md(analysis.post.topic_title[:40])}]({analysis.post.url})"
lines.append(
f"| {analysis.post.created_at.strftime('%Y-%m-%d')} "
f"| {topic_link} "
f"| {v.rule_id}: {v.rule_name} "
f"| {self._severity_emoji(v.severity)} "
f"| {v.confidence:.0%} |"
)
for analysis, violations in filtered:
for v in violations:
date_str = analysis.post.created_at.strftime("%b %d, %Y")
# Detailed analysis
lines.extend(
[
"",
"---",
"",
"### Detailed Analysis",
"",
]
)
for i, analysis in enumerate(violations, 1):
lines.extend(
[
f"#### {i}. [{self._escape_md(analysis.post.topic_title)}]({analysis.post.url})",
f"**Date:** {analysis.post.created_at.strftime('%Y-%m-%d %H:%M UTC')} | "
f"**Category:** {analysis.post.category_name or 'Unknown'}",
"",
]
)
for v in analysis.violations:
lines.extend(
[
f"**{self._severity_emoji(v.severity)} Rule {v.rule_id} - {v.rule_name}** "
f"(Confidence: {v.confidence:.0%})",
"",
]
)
# Create post link with topic and post number
topic_short = analysis.post.topic_title[:40]
if len(analysis.post.topic_title) > 40:
topic_short = topic_short[:37] + "..."
post_link = f"[{self._escape_md(topic_short)} - Post {analysis.post.post_number}]({analysis.post.url})"
# Combine quote and explanation
description = v.explanation
if v.quote:
lines.extend(
[
f"> {self._escape_md(v.quote)}",
"",
]
)
quote_escaped = self._escape_md(v.quote[:100])
if len(v.quote) > 100:
quote_escaped += "..."
description = f'"{quote_escaped}" - {v.explanation}'
lines.extend(
[
f"*{v.explanation}*",
"",
]
lines.append(
f"| {date_str} | {post_link} | {v.rule_name} | {description} |"
)
lines.append("---")
lines.append("")
# Footer
lines.extend(
[
"*Report generated by EVE Forum Moderator Assistant*",
f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
"",
"---",
"",
f"*Report generated by EVE Forum Moderator Assistant on {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
]
)
@@ -268,7 +244,9 @@ class ReportGenerator:
def _escape_md(self, text: str) -> str:
"""Escape markdown special characters."""
# Escape characters that could break markdown formatting
for char in ["[", "]", "|", "*", "_", "`"]:
for char in ["|", "\n"]:
text = text.replace(char, " ")
for char in ["[", "]", "*", "_", "`"]:
text = text.replace(char, f"\\{char}")
return text