refactor: compact report format with 85% confidence threshold

- Update report format to match Gemini-style table output - Add confidence threshold (85% default) to filter noise - Add --show-all flag to display all violations - Combine quote and explanation in single Description column - Much cleaner output: 12 high-confidence vs 51 total violations
2026-01-06 04:15:46 +00:00
parent e09f74578d
commit 6dd4e3b6db
2 changed files with 105 additions and 118 deletions
--- a/src/eve_mod/cli.py
+++ b/src/eve_mod/cli.py
@@ -66,6 +66,11 @@ def cli() -> None:
    is_flag=True,
    help="Show debug output including sample post content",
 )
+@click.option(
+    "--show-all",
+    is_flag=True,
+    help="Show all violations regardless of confidence (default: 85%+ only)",
+)
 def review(
    username: str,
    days: int,
@@ -75,6 +80,7 @@ def review(
    max_posts: int,
    enrich: bool,
    debug: bool,
+    show_all: bool,
 ) -> None:
    """
    Review a user's forum posts for potential rule violations.
@@ -101,6 +107,7 @@ def review(
                max_posts=max_posts,
                enrich=enrich,
                debug=debug,
+                show_all=show_all,
            )
        )

@@ -126,6 +133,7 @@ async def _review_user(
    max_posts: int,
    enrich: bool,
    debug: bool = False,
+    show_all: bool = False,
 ) -> None:
    """Async implementation of user review."""
    try:
@@ -217,7 +225,8 @@ async def _review_user(
        )

    # Generate report
-    report_gen = ReportGenerator()
+    confidence_threshold = 0.0 if show_all else 0.85
+    report_gen = ReportGenerator(confidence_threshold=confidence_threshold)

    # Print to terminal
    report_gen.print_summary(user, analyses, days)
--- a/src/eve_mod/report.py
+++ b/src/eve_mod/report.py
@@ -11,41 +11,44 @@ from rich.text import Text
 from .analyzer import PostAnalysis, Violation
 from .discourse import UserProfile

+# Default confidence threshold (85%)
+DEFAULT_CONFIDENCE_THRESHOLD = 0.85
+

 class ReportGenerator:
    """Generates moderation reports in terminal and markdown formats."""

-    def __init__(self, output_dir: Path | str = "reports"):
+    def __init__(
+        self,
+        output_dir: Path | str = "reports",
+        confidence_threshold: float = DEFAULT_CONFIDENCE_THRESHOLD,
+    ):
        """
        Initialize the report generator.

        Args:
            output_dir: Directory to save markdown reports
+            confidence_threshold: Minimum confidence to include violations (0.0-1.0)
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)
        self.console = Console()
+        self.confidence_threshold = confidence_threshold

-    def _severity_color(self, severity: str) -> str:
-        """Get color for severity level."""
-        colors = {
-            "low": "yellow",
-            "medium": "orange1",
-            "high": "red",
-            "critical": "bold red",
-        }
-        return colors.get(severity, "white")
-
-    def _severity_emoji(self, severity: str) -> str:
-        """Get indicator for severity level (Discourse-friendly)."""
-        # Using text indicators instead of emoji for broader compatibility
-        indicators = {
-            "low": "[LOW]",
-            "medium": "[MED]",
-            "high": "[HIGH]",
-            "critical": "[CRITICAL]",
-        }
-        return indicators.get(severity, "[-]")
+    def _filter_violations(
+        self, analyses: list[PostAnalysis]
+    ) -> list[tuple[PostAnalysis, list[Violation]]]:
+        """Filter violations by confidence threshold."""
+        results = []
+        for analysis in analyses:
+            high_conf = [
+                v
+                for v in analysis.violations
+                if v.confidence >= self.confidence_threshold
+            ]
+            if high_conf:
+                results.append((analysis, high_conf))
+        return results

    def print_summary(
        self,
@@ -61,8 +64,8 @@ class ReportGenerator:
            analyses: List of post analyses
            days: Number of days analyzed
        """
-        violations = [a for a in analyses if a.has_violations]
-        total_violations = sum(len(a.violations) for a in violations)
+        filtered = self._filter_violations(analyses)
+        total_violations = sum(len(v) for _, v in filtered)

        # Header
        self.console.print()
@@ -71,37 +74,47 @@ class ReportGenerator:
                f"[bold]User Review: {user.username}[/bold]\n"
                f"Period: Last {days} days\n"
                f"Posts Analyzed: {len(analyses)}\n"
-                f"Posts with Violations: {len(violations)}\n"
-                f"Total Violations Found: {total_violations}",
+                f"Posts with Violations: {len(filtered)}\n"
+                f"Total Violations Found: {total_violations}\n"
+                f"[dim](confidence threshold: {self.confidence_threshold:.0%})[/dim]",
                title="EVE Forum Moderation Report",
                border_style="blue",
            )
        )

-        if not violations:
+        if not filtered:
            self.console.print(
-                "\n[green]No rule violations detected in the analyzed posts.[/green]\n"
+                "\n[green]No rule violations detected above confidence threshold.[/green]\n"
            )
            return

-        # Summary table
-        table = Table(title="Violations Summary", show_header=True, header_style="bold")
+        # Compact table format
+        table = Table(show_header=True, header_style="bold", show_lines=True)
        table.add_column("Date", style="dim", width=12)
-        table.add_column("Topic", width=40, overflow="ellipsis")
-        table.add_column("Rule", width=25)
-        table.add_column("Severity", width=10, justify="center")
-        table.add_column("Confidence", width=10, justify="center")
+        table.add_column("Post Link", width=50)
+        table.add_column("Rule", width=20)
+        table.add_column("Description", width=60)
+
+        for analysis, violations in filtered:
+            for v in violations:
+                # Create a compact post reference
+                post_ref = f"{analysis.post.topic_title[:35]}..."
+                if len(analysis.post.topic_title) <= 35:
+                    post_ref = analysis.post.topic_title
+
+                # Combine explanation with quote for compact view
+                description = v.explanation
+                if v.quote:
+                    quote_preview = (
+                        v.quote[:80] + "..." if len(v.quote) > 80 else v.quote
+                    )
+                    description = f'"{quote_preview}" - {v.explanation}'

-        for analysis in violations:
-            for v in analysis.violations:
                table.add_row(
-                    analysis.post.created_at.strftime("%Y-%m-%d"),
-                    analysis.post.topic_title[:38] + "..."
-                    if len(analysis.post.topic_title) > 40
-                    else analysis.post.topic_title,
-                    f"{v.rule_id}: {v.rule_name}",
-                    Text(v.severity.upper(), style=self._severity_color(v.severity)),
-                    f"{v.confidence:.0%}",
+                    analysis.post.created_at.strftime("%b %d, %Y"),
+                    post_ref,
+                    v.rule_name,
+                    description,
                )

        self.console.print(table)
@@ -114,14 +127,14 @@ class ReportGenerator:
        Args:
            analyses: List of post analyses with violations
        """
-        violations = [a for a in analyses if a.has_violations]
+        filtered = self._filter_violations(analyses)

-        if not violations:
+        if not filtered:
            return

        self.console.print("[bold]Detailed Analysis[/bold]\n")

-        for analysis in violations:
+        for analysis, violations in filtered:
            self.console.print(
                Panel(
                    f"[bold]{analysis.post.topic_title}[/bold]\n"
@@ -131,16 +144,14 @@ class ReportGenerator:
                )
            )

-            for v in analysis.violations:
+            for v in violations:
                self.console.print(
-                    f"  [{self._severity_color(v.severity)}]"
-                    f"{v.severity.upper()}[/] - "
                    f"  [bold]Rule {v.rule_id}: {v.rule_name}[/bold] "
-                    f"(confidence: {v.confidence:.0%})"
+                    f"({v.confidence:.0%})"
                )
-                self.console.print(f"  [dim]Explanation:[/dim] {v.explanation}")
                if v.quote:
-                    self.console.print(f'  [dim]Quote:[/dim] "{v.quote}"')
+                    self.console.print(f'  [dim]"{v.quote}"[/dim]')
+                self.console.print(f"  {v.explanation}")
                self.console.print()

    def generate_markdown(
@@ -160,106 +171,71 @@ class ReportGenerator:
        Returns:
            Markdown string
        """
-        violations = [a for a in analyses if a.has_violations]
-        total_violations = sum(len(a.violations) for a in violations)
+        filtered = self._filter_violations(analyses)
+        total_violations = sum(len(v) for _, v in filtered)

        lines = [
            f"## User Review: {user.username}",
            "",
            f"**Period:** Last {days} days (ending {datetime.now().strftime('%Y-%m-%d')})",
            f"**Posts Analyzed:** {len(analyses)}",
-            f"**Posts with Violations:** {len(violations)}",
+            f"**Posts with Violations:** {len(filtered)}",
            f"**Total Violations Found:** {total_violations}",
            "",
        ]

-        if not violations:
+        if not filtered:
            lines.extend(
                [
                    "---",
                    "",
-                    "*No rule violations detected in the analyzed posts.*",
+                    "*No rule violations detected above confidence threshold.*",
                    "",
                ]
            )
            return "\n".join(lines)

-        # Summary table
+        # Compact table format matching Gemini style
        lines.extend(
            [
                "---",
                "",
-                "### Summary",
+                "### Rule Violations & Combative Content",
                "",
-                "| Date | Topic | Rule | Severity | Confidence |",
-                "|------|-------|------|----------|------------|",
+                "| Date | Post Link | Rule | Description / Reasoning |",
+                "|------|-----------|------|-------------------------|",
            ]
        )

-        for analysis in violations:
-            for v in analysis.violations:
-                topic_link = f"[{self._escape_md(analysis.post.topic_title[:40])}]({analysis.post.url})"
-                lines.append(
-                    f"| {analysis.post.created_at.strftime('%Y-%m-%d')} "
-                    f"| {topic_link} "
-                    f"| {v.rule_id}: {v.rule_name} "
-                    f"| {self._severity_emoji(v.severity)} "
-                    f"| {v.confidence:.0%} |"
-                )
+        for analysis, violations in filtered:
+            for v in violations:
+                date_str = analysis.post.created_at.strftime("%b %d, %Y")

-        # Detailed analysis
-        lines.extend(
-            [
-                "",
-                "---",
-                "",
-                "### Detailed Analysis",
-                "",
-            ]
-        )
-
-        for i, analysis in enumerate(violations, 1):
-            lines.extend(
-                [
-                    f"#### {i}. [{self._escape_md(analysis.post.topic_title)}]({analysis.post.url})",
-                    f"**Date:** {analysis.post.created_at.strftime('%Y-%m-%d %H:%M UTC')} | "
-                    f"**Category:** {analysis.post.category_name or 'Unknown'}",
-                    "",
-                ]
-            )
-
-            for v in analysis.violations:
-                lines.extend(
-                    [
-                        f"**{self._severity_emoji(v.severity)} Rule {v.rule_id} - {v.rule_name}** "
-                        f"(Confidence: {v.confidence:.0%})",
-                        "",
-                    ]
-                )
+                # Create post link with topic and post number
+                topic_short = analysis.post.topic_title[:40]
+                if len(analysis.post.topic_title) > 40:
+                    topic_short = topic_short[:37] + "..."
+                post_link = f"[{self._escape_md(topic_short)} - Post {analysis.post.post_number}]({analysis.post.url})"

+                # Combine quote and explanation
+                description = v.explanation
                if v.quote:
-                    lines.extend(
-                        [
-                            f"> {self._escape_md(v.quote)}",
-                            "",
-                        ]
-                    )
+                    quote_escaped = self._escape_md(v.quote[:100])
+                    if len(v.quote) > 100:
+                        quote_escaped += "..."
+                    description = f'"{quote_escaped}" - {v.explanation}'

-                lines.extend(
-                    [
-                        f"*{v.explanation}*",
-                        "",
-                    ]
+                lines.append(
+                    f"| {date_str} | {post_link} | {v.rule_name} | {description} |"
                )

-            lines.append("---")
-            lines.append("")
-
        # Footer
        lines.extend(
            [
-                "*Report generated by EVE Forum Moderator Assistant*",
-                f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
+                "",
+                "---",
+                "",
+                f"*Report generated by EVE Forum Moderator Assistant on {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}*",
            ]
        )

@@ -268,7 +244,9 @@ class ReportGenerator:
    def _escape_md(self, text: str) -> str:
        """Escape markdown special characters."""
        # Escape characters that could break markdown formatting
-        for char in ["[", "]", "|", "*", "_", "`"]:
+        for char in ["|", "\n"]:
+            text = text.replace(char, " ")
+        for char in ["[", "]", "*", "_", "`"]:
            text = text.replace(char, f"\\{char}")
        return text