refactor: compact report format with 85% confidence threshold

- Update report format to match Gemini-style table output - Add confidence threshold (85% default) to filter noise - Add --show-all flag to display all violations - Combine quote and explanation in single Description column - Much cleaner output: 12 high-confidence vs 51 total violations
2026-01-06 04:15:46 +00:00
parent e09f74578d
commit 6dd4e3b6db
2 changed files with 105 additions and 118 deletions
--- a/src/eve_mod/cli.py
+++ b/src/eve_mod/cli.py
@@ -66,6 +66,11 @@ def cli() -> None:
    is_flag=True,
    help="Show debug output including sample post content",
 )
@click.option(
    "--show-all",
    is_flag=True,
    help="Show all violations regardless of confidence (default: 85%+ only)",
 )
 def review(
    username: str,
    days: int,
@@ -75,6 +80,7 @@ def review(
    max_posts: int,
    enrich: bool,
    debug: bool,
    show_all: bool,
 ) -> None:
    """
    Review a user's forum posts for potential rule violations.
@@ -101,6 +107,7 @@ def review(
                max_posts=max_posts,
                enrich=enrich,
                debug=debug,
                show_all=show_all,
            )
        )
@@ -126,6 +133,7 @@ async def _review_user(
    max_posts: int,
    enrich: bool,
    debug: bool = False,
    show_all: bool = False,
 ) -> None:
    """Async implementation of user review."""
    try:
@@ -217,7 +225,8 @@ async def _review_user(
        )
    # Generate report
-    report_gen = ReportGenerator()
+    confidence_threshold = 0.0 if show_all else 0.85
    report_gen = ReportGenerator(confidence_threshold=confidence_threshold)
    # Print to terminal
    report_gen.print_summary(user, analyses, days)
--- a/src/eve_mod/report.py
+++ b/src/eve_mod/report.py
@@ -11,41 +11,44 @@ from rich.text import Text
 from .analyzer import PostAnalysis, Violation
 from .discourse import UserProfile
 # Default confidence threshold (85%)
 DEFAULT_CONFIDENCE_THRESHOLD = 0.85
 class ReportGenerator:
    """Generates moderation reports in terminal and markdown formats."""
-    def __init__(self, output_dir: Path | str = "reports"):
+    def __init__(
        self,
        output_dir: Path | str = "reports",
        confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
    ):
        """
        Initialize the report generator.
        Args:
            output_dir: Directory to save markdown reports
            confidence_threshold: Minimum confidence to include violations (0.0-1.0)
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.console = Console()
        self.confidence_threshold = confidence_threshold
-    def _severity_color(self, severity: str) -> str:
+    def _filter_violations(
-        """Get color for severity level."""
+        self, analyses: list[PostAnalysis]
-        colors = {
+    ) -> list[tuple[PostAnalysis, list[Violation]]]:
-            "low": "yellow",
+        """Filter violations by confidence threshold."""
-            "medium": "orange1",
+        results = []
-            "high": "red",
+        for analysis in analyses:
-            "critical": "bold red",
+            high_conf = [
-        }
+                v
-        return colors.get(severity, "white")
+                for v in analysis.violations
-
+                if v.confidence >= self.confidence_threshold
-    def _severity_emoji(self, severity: str) -> str:
+            ]
-        """Get indicator for severity level (Discourse-friendly)."""
+            if high_conf:
-        # Using text indicators instead of emoji for broader compatibility
+                results.append((analysis, high_conf))
-        indicators = {
+        return results
            "low": "[LOW]",
            "medium": "[MED]",
            "high": "[HIGH]",
            "critical": "[CRITICAL]",
        }
        return indicators.get(severity, "[-]")
    def print_summary(
        self,
@@ -61,8 +64,8 @@ class ReportGenerator:
            analyses: List of post analyses
            days: Number of days analyzed
        """
-        violations = [a for a in analyses if a.has_violations]
+        filtered = self._filter_violations(analyses)
-        total_violations = sum(len(a.violations) for a in violations)
+        total_violations = sum(len(v) for _, v in filtered)
        # Header
        self.console.print()
@@ -71,37 +74,47 @@ class ReportGenerator:
                f"[bold]User Review: {user.username}[/bold]\n"
                f"Period: Last {days} days\n"
                f"Posts Analyzed: {len(analyses)}\n"
-                f"Posts with Violations: {len(violations)}\n"
+                f"Posts with Violations: {len(filtered)}\n"
-                f"Total Violations Found: {total_violations}",
+                f"Total Violations Found: {total_violations}\n"
                f"[dim](confidence threshold: {self.confidence_threshold:.0%})[/dim]",
                title="EVE Forum Moderation Report",
                border_style="blue",
            )
        )
-        if not violations:
+        if not filtered:
            self.console.print(
-                "\n[green]No rule violations detected in the analyzed posts.[/green]\n"
+                "\n[green]No rule violations detected above confidence threshold.[/green]\n"
            )
            return
-        # Summary table
+        # Compact table format
-        table = Table(title="Violations Summary", show_header=True, header_style="bold")
+        table = Table(show_header=True, header_style="bold", show_lines=True)
        table.add_column("Date", style="dim", width=12)
-        table.add_column("Topic", width=40, overflow="ellipsis")
+        table.add_column("Post Link", width=50)
-        table.add_column("Rule", width=25)
+        table.add_column("Rule", width=20)
-        table.add_column("Severity", width=10, justify="center")
+        table.add_column("Description", width=60)
-        table.add_column("Confidence", width=10, justify="center")
+
        for analysis, violations in filtered:
            for v in violations:
                # Create a compact post reference
                post_ref = f"{analysis.post.topic_title[:35]}..."
                if len(analysis.post.topic_title) <= 35:
                    post_ref = analysis.post.topic_title
                # Combine explanation with quote for compact view
                description = v.explanation
                if v.quote:
                    quote_preview = (
                        v.quote[:80] + "..." if len(v.quote) > 80 else v.quote
                    )
                    description = f'"{quote_preview}" - {v.explanation}'
        for analysis in violations:
            for v in analysis.violations:
                table.add_row(
-                    analysis.post.created_at.strftime("%Y-%m-%d"),
+                    analysis.post.created_at.strftime("%b %d, %Y"),
-                    analysis.post.topic_title[:38] + "..."
+                    post_ref,
-                    if len(analysis.post.topic_title) > 40
+                    v.rule_name,
-                    else analysis.post.topic_title,
+                    description,
                    f"{v.rule_id}: {v.rule_name}",
                    Text(v.severity.upper(), style=self._severity_color(v.severity)),
                    f"{v.confidence:.0%}",
                )
        self.console.print(table)
@@ -114,14 +127,14 @@ class ReportGenerator:
        Args:
            analyses: List of post analyses with violations
        """
-        violations = [a for a in analyses if a.has_violations]
+        filtered = self._filter_violations(analyses)
-        if not violations:
+        if not filtered:
            return
        self.console.print("[bold]Detailed Analysis[/bold]\n")
-        for analysis in violations:
+        for analysis, violations in filtered:
            self.console.print(
                Panel(
                    f"[bold]{analysis.post.topic_title}[/bold]\n"
@@ -131,16 +144,14 @@ class ReportGenerator:
                )
            )
-            for v in analysis.violations:
+            for v in violations:
                self.console.print(
                    f"  [{self._severity_color(v.severity)}]"
                    f"{v.severity.upper()}[/] - "
                    f"  [bold]Rule {v.rule_id}: {v.rule_name}[/bold] "
-                    f"(confidence: {v.confidence:.0%})"
+                    f"({v.confidence:.0%})"
                )
                self.console.print(f"  [dim]Explanation:[/dim] {v.explanation}")
                if v.quote:
-                    self.console.print(f'  [dim]Quote:[/dim] "{v.quote}"')
+                    self.console.print(f'  [dim]"{v.quote}"[/dim]')
                self.console.print(f"  {v.explanation}")
                self.console.print()
    def generate_markdown(
@@ -160,106 +171,71 @@ class ReportGenerator:
        Returns:
            Markdown string
        """
-        violations = [a for a in analyses if a.has_violations]
+        filtered = self._filter_violations(analyses)
-        total_violations = sum(len(a.violations) for a in violations)
+        total_violations = sum(len(v) for _, v in filtered)
        lines = [
            f"## User Review: {user.username}",
            "",
            f"**Period:** Last {days} days (ending {datetime.now().strftime('%Y-%m-%d')})",
            f"**Posts Analyzed:** {len(analyses)}",
-            f"**Posts with Violations:** {len(violations)}",
+            f"**Posts with Violations:** {len(filtered)}",
            f"**Total Violations Found:** {total_violations}",
            "",
        ]
-        if not violations:
+        if not filtered:
            lines.extend(
                [
                    "---",
                    "",
-                    "*No rule violations detected in the analyzed posts.*",
+                    "*No rule violations detected above confidence threshold.*",
                    "",
                ]
            )
            return "\n".join(lines)
-        # Summary table
+        # Compact table format matching Gemini style
        lines.extend(
            [
                "---",
                "",
-                "### Summary",
+                "### Rule Violations & Combative Content",
                "",
-                "| Date | Topic | Rule | Severity | Confidence |",
+                "| Date | Post Link | Rule | Description / Reasoning |",
-                "|------|-------|------|----------|------------|",
+                "|------|-----------|------|-------------------------|",
            ]
        )
-        for analysis in violations:
+        for analysis, violations in filtered:
-            for v in analysis.violations:
+            for v in violations:
-                topic_link = f"[{self._escape_md(analysis.post.topic_title[:40])}]({analysis.post.url})"
+                date_str = analysis.post.created_at.strftime("%b %d, %Y")
                lines.append(
                    f"| {analysis.post.created_at.strftime('%Y-%m-%d')} "
                    f"| {topic_link} "
                    f"| {v.rule_id}: {v.rule_name} "
                    f"| {self._severity_emoji(v.severity)} "
                    f"| {v.confidence:.0%} |"
                )
-        # Detailed analysis
+                # Create post link with topic and post number
-        lines.extend(
+                topic_short = analysis.post.topic_title[:40]
-            [
+                if len(analysis.post.topic_title) > 40:
-                "",
+                    topic_short = topic_short[:37] + "..."
-                "---",
+                post_link = f"[{self._escape_md(topic_short)} - Post {analysis.post.post_number}]({analysis.post.url})"
                "",
                "### Detailed Analysis",
                "",
            ]
        )
        for i, analysis in enumerate(violations, 1):
            lines.extend(
                [
                    f"#### {i}. [{self._escape_md(analysis.post.topic_title)}]({analysis.post.url})",
                    f"**Date:** {analysis.post.created_at.strftime('%Y-%m-%d %H:%M UTC')} | "
                    f"**Category:** {analysis.post.category_name or 'Unknown'}",
                    "",
                ]
            )
            for v in analysis.violations:
                lines.extend(
                    [
                        f"**{self._severity_emoji(v.severity)} Rule {v.rule_id} - {v.rule_name}** "
                        f"(Confidence: {v.confidence:.0%})",
                        "",
                    ]
                )
                # Combine quote and explanation
                description = v.explanation
                if v.quote:
-                    lines.extend(
+                    quote_escaped = self._escape_md(v.quote[:100])
-                        [
+                    if len(v.quote) > 100:
-                            f"> {self._escape_md(v.quote)}",
+                        quote_escaped += "..."
-                            "",
+                    description = f'"{quote_escaped}" - {v.explanation}'
                        ]
                    )
-                lines.extend(
+                lines.append(
-                    [
+                    f"| {date_str} | {post_link} | {v.rule_name} | {description} |"
                        f"*{v.explanation}*",
                        "",
                    ]
                )
            lines.append("---")
            lines.append("")
        # Footer
        lines.extend(
            [
-                "*Report generated by EVE Forum Moderator Assistant*",
+                "",
-                f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
+                "---",
                "",
                f"*Report generated by EVE Forum Moderator Assistant on {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
            ]
        )
@@ -268,7 +244,9 @@ class ReportGenerator:
    def _escape_md(self, text: str) -> str:
        """Escape markdown special characters."""
        # Escape characters that could break markdown formatting
-        for char in ["[", "]", "|", "*", "_", "`"]:
+        for char in ["|", "\n"]:
            text = text.replace(char, " ")
        for char in ["[", "]", "*", "_", "`"]:
            text = text.replace(char, f"\\{char}")
        return text