"""
Report generator for bibliography check results.
"""
import json
import re
from dataclasses import asdict, dataclass, is_dataclass
from datetime import datetime
from typing import Any, Optional, List, Dict
from pathlib import Path

from ..parsers.bib_parser import BibEntry
from ..analyzers.metadata_comparator import ComparisonResult
from ..analyzers.usage_checker import UsageResult
from ..analyzers.llm_evaluator import EvaluationResult
from ..analyzers.duplicate_detector import DuplicateGroup
from ..checkers.base import CheckResult, CheckSeverity
from .html_report import render_standalone_html


@dataclass
class EntryReport:
    """Complete report for a single bib entry."""
    entry: BibEntry
    comparison: Optional[ComparisonResult]
    usage: Optional[UsageResult]
    evaluations: list[EvaluationResult]


def _json_default(o):
    if is_dataclass(o):
        return asdict(o)
    if hasattr(o, "value"):
        return o.value
    return str(o)


class ReportGenerator:
    """Generates formatted markdown reports."""
    
    def __init__(self, minimal_verified: bool = False, check_preprint_ratio: bool = True, preprint_warning_threshold: float = 0.50):
        self.entries: list[EntryReport] = []
        self.missing_citations: list[str] = []
        self.duplicate_groups: list[DuplicateGroup] | None = None  # None means check not run
        self.bib_files: list[str] = []
        self.tex_files: list[str] = []
        self.bib_file: str = "" # Keep for backward compatibility/single file
        self.tex_file: str = "" # Keep for backward compatibility/single file
        self.minimal_verified = minimal_verified  # Whether to show minimal info for verified entries
        self.submission_results: List[CheckResult] = []  # Submission quality check results
        self.template = None  # Conference template if used
        self.check_preprint_ratio = check_preprint_ratio  # Whether to check preprint ratio
        self.preprint_warning_threshold = preprint_warning_threshold  # Threshold for preprint warning
        self.retraction_findings: list = []  # F1 results
        self.url_findings: list = []        # F2 results

    def set_retraction_findings(self, findings) -> None:
        self.retraction_findings = list(findings or [])

    def set_url_findings(self, findings) -> None:
        self.url_findings = list(findings or [])

    
    def add_entry_report(self, report: EntryReport):
        """Add an entry report."""
        self.entries.append(report)
    
    def set_metadata(self, bib_files: str | list[str], tex_files: str | list[str]):
        """Set source file information."""
        if isinstance(bib_files, str):
            self.bib_files = [bib_files]
            self.bib_file = bib_files
        else:
            self.bib_files = bib_files
            self.bib_file = bib_files[0] if bib_files else ""
            
        if isinstance(tex_files, str):
            self.tex_files = [tex_files]
            self.tex_file = tex_files
        else:
            self.tex_files = tex_files
            self.tex_file = tex_files[0] if tex_files else ""
    
    def set_missing_citations(self, missing: list[str]):
        """Set list of citations without bib entries."""
        self.missing_citations = missing
    
    def set_duplicate_groups(self, groups: list[DuplicateGroup]):
        """Set list of duplicate entry groups."""
        self.duplicate_groups = groups
    
    def set_submission_results(self, results: List[CheckResult], template=None):
        """Set submission quality check results."""
        self.submission_results = results
        self.template = template
    
    def generate(self) -> str:
        """Generate the full markdown report."""
        lines = []
        
        # Header
        lines.extend(self._generate_header())
        lines.append("")
        
        # Disclaimer
        lines.extend(self._generate_disclaimer())
        lines.append("")
        
        # Summary statistics
        lines.extend(self._generate_summary())
        lines.append("")
        
        # ⚠️ Critical Issues (Detailed) - Bibliography-related issues
        lines.extend(self._generate_issues_section())
        lines.append("")
        
        # ✅ Verified Entries (Clean)
        lines.extend(self._generate_verified_section())
        lines.append("")
        
        # 📋 Submission Quality Checks (LaTeX quality checks)
        if self.submission_results:
            lines.extend(self._generate_submission_section())
            lines.append("")
        
        # Footer
        lines.extend(self._generate_footer())
        
        return "\n".join(lines)

    def get_summary_stats(self) -> tuple[dict, dict]:
        """Get summary statistics as dictionaries for console display (Issues only)."""
        total = len(self.entries)
        
        # Bibliography issues breakdown
        title_mismatches = 0
        author_mismatches = 0
        year_mismatches = 0
        low_relevance = 0
        unable_to_verify = 0
        
        for e in self.entries:
            # Metadata issues
            if e.comparison:
                if e.comparison.has_issues:
                    # Categorize issues
                    has_title = False
                    has_author = False
                    has_year = False
                    
                    for issue in e.comparison.issues:
                        if "Title mismatch" in issue: has_title = True
                        elif "Author mismatch" in issue: has_author = True
                        elif "Year mismatch" in issue: has_year = True
                        elif "Unable to find" in issue: unable_to_verify += 1
                    
                    if has_title: title_mismatches += 1
                    if has_author: author_mismatches += 1
                    if has_year: year_mismatches += 1
            
            # Relevance issues
            if any(ev.relevance_score <= 2 for ev in e.evaluations):
                low_relevance += 1

        bib_stats = {}
        if title_mismatches > 0: bib_stats["Title Mismatches"] = title_mismatches
        if author_mismatches > 0: bib_stats["Author Mismatches"] = author_mismatches
        if year_mismatches > 0: bib_stats["Year Mismatches"] = year_mismatches
        if low_relevance > 0: bib_stats["Low Relevance"] = low_relevance
        if unable_to_verify > 0: bib_stats["Unable to Verify"] = unable_to_verify
        
        if self.duplicate_groups:
            bib_stats["Duplicate Groups"] = len(self.duplicate_groups)
        
        if self.missing_citations:
            bib_stats["Missing Bib Entries"] = len(self.missing_citations)
            
        unused = [e for e in self.entries if e.usage and not e.usage.is_used]
        if unused:
            bib_stats["Unused Entries"] = len(unused)
        
        # LaTeX stats - Group by precise Rule Names
        latex_stats = {}
        
        # Rule mapping for professional display names
        RULE_MAPPING = {
            "Very long sentence": "Sentence Length (Critical)",
            "Long sentence": "Sentence Length (Warning)",
            "Possible Markdown bullet point": "Markdown Bullet Point",
            "Possible Markdown numbered list": "Markdown Numbered List",
            "Possible Markdown italic": "Markdown Italic",
            "Possible Markdown bold": "Markdown Bold",
            "Inconsistent hyphenation": "Hyphenation Inconsistency",
            "Inconsistent spelling": "Spelling Inconsistency",
            "Unreferenced figure": "Unreferenced Figure",
            "Unreferenced table": "Unreferenced Table",
            "Unreferenced section": "Unreferenced Section",
            "Unreferenced label": "Unreferenced Label",
            "Citation from": "Old Citation (10+ years)",
            "Hedging language": "Hedging/Vague Language",
            "Redundant phrase": "Redundant Phrasing",
            "Weak start with": "Weak Sentence Starter",
            "Unescaped &": "Unescaped Special Character",
            "Citation without non-breaking space": "Missing Non-breaking Space (~)",
            "Mixed citation styles": "Mixed Citation Styles",
            "Mixed inline math": "Mixed Math Notation",
            "Appendix section": "Unreferenced Appendix",
            "Missing space before unit": "Unit Spacing Issue"
        }

        for r in self.submission_results:
            if r.passed:
                continue
            
            raw_msg = r.message
            rule_name = "Unknown Rule"
            
            # Match against our professional rule names
            matched = False
            for pattern, official_name in RULE_MAPPING.items():
                if pattern in raw_msg:
                    rule_name = official_name
                    matched = True
                    break
            
            if not matched:
                # Fallback: Clean the message (remove dynamic parts)
                clean_msg = re.sub(r"\(.*?\)", "", raw_msg)
                clean_msg = re.sub(r"'.*?'", "", clean_msg)
                clean_msg = re.sub(r"\d+", "", clean_msg)
                rule_name = clean_msg.split(":")[0].strip()
            
            if rule_name not in latex_stats:
                latex_stats[rule_name] = 0
            latex_stats[rule_name] += 1
        
        return bib_stats, latex_stats

    def generate_console_output(self) -> str:
        """Generate console-friendly output (Summary + Issues only)."""
        lines = []
        
        # Summary statistics
        lines.extend(self._generate_summary())
        lines.append("")
        
        # Critical Issues
        lines.extend(self._generate_issues_section())
        lines.append("")
        
        return "\n".join(lines)
    
    def _generate_header(self) -> list[str]:
        """Generate report header.

        File names are intentionally not printed — keep the report
        portable, and never expose local source paths to anyone the
        report is shared with.
        """
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        return [
            "# Bibliography Validation Report",
            "",
            f"**Generated:** {timestamp}",
            "",
            "| Inputs | Count |",
            "|--------|-------|",
            f"| **Bib File(s)** | {len(self.bib_files)} |",
            f"| **TeX File(s)** | {len(self.tex_files)} |",
        ]

    def _generate_disclaimer(self) -> list[str]:
        """Generate disclaimer section."""
        return [
            "> **⚠️ Disclaimer:** This report is generated by an automated tool. While BibGuard strives for accuracy, it may produce false positives or miss certain issues. **This tool cannot replace human review.** Please manually verify all reported issues before making changes to your bibliography."
        ]
    
    def _generate_summary(self) -> list[str]:
        """Generate summary statistics."""
        total = len(self.entries)
        
        # Check availability of results
        has_metadata = any(e.comparison is not None for e in self.entries)
        has_usage = any(e.usage is not None for e in self.entries)
        has_eval = any(len(e.evaluations) > 0 for e in self.entries)
        
        # Calculate Verified/Issues
        # Note: _is_verified depends on _has_issues. 
        # If a check wasn't run, it won't contribute to issues.
        verified = sum(1 for e in self.entries if self._is_verified(e))
        issues = sum(1 for e in self.entries if self._has_issues(e))
        
        # Usage stats
        if has_usage:
            used = sum(1 for e in self.entries if e.usage and e.usage.is_used)
            unused = total - used
            used_str = str(used)
            unused_str = str(unused)
            missing_str = str(len(self.missing_citations))
        else:
            used_str = "N/A"
            unused_str = "N/A"
            missing_str = "N/A"
            
        # Duplicate stats - show N/A if check wasn't run (duplicate_groups is None means not checked)
        if self.duplicate_groups is None:
            dup_str = "N/A"
        else:
            dup_str = str(len(self.duplicate_groups))
        
        # Preprint detection (only if enabled)
        preprint_str = "N/A"
        preprint_warning = []
        if self.check_preprint_ratio and has_usage:
            used_entries = [e for e in self.entries if e.usage and e.usage.is_used]
            if used_entries:
                preprint_count = sum(1 for e in used_entries if self._is_preprint(e.entry))
                preprint_ratio = preprint_count / len(used_entries)
                preprint_str = f"{preprint_count} ({preprint_ratio:.1%})"

                # Warning if exceeds threshold
                if preprint_ratio > self.preprint_warning_threshold:
                    preprint_warning = [
                        "",
                        f"> ⚠️ **High Preprint Ratio Warning:** {preprint_ratio:.1%} of your used references are preprints (arXiv, bioRxiv, etc.). Consider replacing some with peer-reviewed publications if available."
                    ]

        summary_lines = [
            "## 📊 Summary",
            "",
            "### 📚 Bibliography Statistics",
            "",
            "| Metric | Count |",
            "|--------|-------|",
            f"| **Total Entries** | {total} |",
            f"| ✅ **Verified (Clean)** | {verified} |",
            f"| ⚠️ **With Issues** | {issues} |",
            f"| 📝 **Used in TeX** | {used_str} |",
            f"| 🗑️ **Unused** | {unused_str} |",
            f"| 🔄 **Duplicate Groups** | {dup_str} |",
            f"| ❌ **Missing Bib Entries** | {missing_str} |",
            f"| 📄 **Preprints (Used)** | {preprint_str} |",
        ]
        
        # Add warning if needed
        if preprint_warning:
            summary_lines.extend(preprint_warning)
        
        summary_lines.extend([
            "",
            "### 📋 LaTeX Quality Checks",
            "",
            self._get_submission_summary()
        ])
        
        return summary_lines
    
    def _is_preprint(self, entry: BibEntry) -> bool:
        """Check if an entry is a preprint."""
        # Preprint indicators
        preprint_keywords = [
            'arxiv', 'biorxiv', 'medrxiv', 'ssrn', 'preprint', 
            'openreview', 'techreport', 'technical report', 'working paper',
            'tech report', 'tech. report'
        ]
        
        # Check entry type
        if entry.entry_type.lower() in ['techreport', 'unpublished', 'misc']:
            # Further check if it's actually a preprint
            text_to_check = ' '.join([
                entry.journal.lower(),
                entry.booktitle.lower(),
                entry.publisher.lower(),
                entry.entry_type.lower()
            ])
            
            if any(keyword in text_to_check for keyword in preprint_keywords):
                return True
        
        # Check if arXiv ID exists
        if entry.has_arxiv:
            return True
        
        # Check journal/booktitle/publisher fields
        venue_text = ' '.join([
            entry.journal.lower(),
            entry.booktitle.lower(),
            entry.publisher.lower()
        ])
        
        return any(keyword in venue_text for keyword in preprint_keywords)
    
    def _get_submission_summary(self) -> str:
        """Generate submission quality summary table."""
        if not self.submission_results:
            return "*No quality checks were performed.*"
        
        # Count by severity
        error_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.ERROR)
        warning_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.WARNING)
        info_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.INFO)
        
        lines = [
            "| Severity | Count |",
            "|----------|-------|",
            f"| 🔴 **Errors** | {error_count} |",
            f"| 🟡 **Warnings** | {warning_count} |",
            f"| 🔵 **Suggestions** | {info_count} |"
        ]
        return "\n".join(lines)
    
    def _is_verified(self, entry: EntryReport) -> bool:
        """Check if entry is clean (no issues)."""
        return not self._has_issues(entry)

    def _has_issues(self, entry: EntryReport) -> bool:
        """Check if entry has any issues."""
        # Metadata issues
        if entry.comparison and entry.comparison.has_issues:
            return True
        # LLM issues (low relevance)
        if any(ev.relevance_score <= 2 for ev in entry.evaluations):
            return True
        # NOTE: We don't include usage issues (unused) here because
        # unused entries are already shown in the "Unused Entries" section
        return False
    
    def _has_metadata_or_relevance_issues(self, entry: EntryReport) -> bool:
        """Check if entry has metadata or relevance issues (excluding duplicate/unused)."""
        # Metadata issues
        if entry.comparison and entry.comparison.has_issues:
            return True
        # LLM issues (low relevance)
        if any(ev.relevance_score <= 2 for ev in entry.evaluations):
            return True
        return False

    def _generate_issues_section(self) -> list[str]:
        """Generate detailed section for entries with issues."""
        lines = ["## ⚠️ Critical Issues Detected", ""]
        
        has_any_issues = False
        
        # 1. Missing Citations
        if self.missing_citations:
            has_any_issues = True
            lines.append("### ❌ Missing Bibliography Entries")
            lines.append("The following keys are cited in the TeX file but missing from the .bib file:")
            lines.append("")
            for key in self.missing_citations:
                lines.append(f"- `{key}`")
            lines.append("")

        # 2. Duplicate Entries
        if self.duplicate_groups:
            has_any_issues = True
            lines.append("### 🔄 Duplicate Entries")
            for i, group in enumerate(self.duplicate_groups, 1):
                lines.append(f"#### Group {i} (Similarity: {group.similarity_score:.0%})")
                lines.append(f"**Reason:** {group.reason}")
                lines.append("")
                lines.append("| Key | Title | Year |")
                lines.append("|-----|-------|------|")
                for entry in group.entries:
                    lines.append(f"| `{entry.key}` | {entry.title} | {entry.year} |")
                lines.append("")

        # 3. Unused Entries
        unused = [e for e in self.entries if e.usage and not e.usage.is_used]
        if unused:
            has_any_issues = True
            lines.append("### 🗑️ Unused Entries")
            lines.append("The following entries are in the .bib file but NOT cited in the TeX file:")
            lines.append("")
            for e in unused:
                lines.append(f"- `{e.entry.key}`: *{e.entry.title}*")
            lines.append("")

        # 4. Metadata Mismatches & Low Relevance
        issue_entries = [e for e in self.entries if self._has_metadata_or_relevance_issues(e)]
        
        if issue_entries:
            has_any_issues = True
            lines.append("### ⚠️ Metadata & Relevance Issues")
            
            for entry_report in issue_entries:
                lines.extend(self._format_entry_detail(entry_report, is_verified=False))

        if not has_any_issues:
            lines.append("🎉 **No critical issues found!**")

        return lines

    def _generate_verified_section(self) -> list[str]:
        """Generate section for verified entries."""
        lines = ["## ✅ Verified Entries", ""]
        
        verified = [e for e in self.entries if self._is_verified(e)]
        
        if not verified:
            lines.append("_No verified entries found._")
            return lines
            
        lines.append(f"Found **{len(verified)}** entries with correct metadata.")
        lines.append("")
        
        # Use a collapsible details block for clean UI
        lines.append("<details>")
        lines.append("<summary>Click to view verified entries</summary>")
        lines.append("")
        
        for entry_report in verified:
            lines.extend(self._format_entry_detail(entry_report, minimal=self.minimal_verified, is_verified=True))
            
        lines.append("</details>")
        return lines

    def _format_entry_detail(self, report: EntryReport, minimal: bool = False, is_verified: bool = False) -> list[str]:
        """Format a single entry report in Markdown."""
        entry = report.entry
        comp = report.comparison
        lines = []
        
        # Title header - use checkmark for verified entries, warning for issues
        icon = "✅" if is_verified else "⚠️"
        lines.append(f"#### {icon} `{entry.key}`")
        lines.append(f"**Title:** {entry.title}")
        lines.append("")
        
        # Metadata Status
        if comp:
            status_icon = "✅" if comp.is_match else "❌"
            lines.append(f"- **Metadata Status:** {status_icon} {comp.source.upper()} (Confidence: {comp.confidence:.1%})")
            
            if comp.has_issues and not minimal:
                lines.append("  - **Discrepancies:**")
                for issue in comp.issues:
                     # Format mismatch details nicely
                    if "Mismatch" in issue or "mismatch" in issue:
                        lines.append(f"    - 🔴 {issue}")
                        if "Title" in issue:
                            lines.append(f"      - **Bib:** `{comp.bib_title}`")
                            lines.append(f"      - **Fetched:** `{comp.fetched_title}`")
                        elif "Author" in issue:
                            lines.append(f"      - **Bib:** `{', '.join(comp.bib_authors)}`")
                            lines.append(f"      - **Fetched:** `{', '.join(comp.fetched_authors)}`")
                    else:
                        lines.append(f"    - 🔸 {issue}")

            # Positive notes (corroboration, year-tolerance) — separate from issues.
            notes = list(getattr(comp, "notes", []) or [])
            if notes and not minimal:
                lines.append("  - **Notes:**")
                for note in notes:
                    lines.append(f"    - 🟢 {note}")
        
        # Relevance Status
        if report.evaluations and not minimal:
            lines.append("- **Relevance Analysis:**")
            for eval_res in report.evaluations:
                score_icon = "🟢" if eval_res.relevance_score >= 4 else ("🟡" if eval_res.relevance_score == 3 else "🔴")
                lines.append(f"  - {score_icon} **Score {eval_res.relevance_score}/5** ({eval_res.score_label})")
                if eval_res.line_number:
                    lines.append(f"    - Line {eval_res.line_number}")
                lines.append(f"    - *\"{eval_res.explanation}\"*")

        lines.append("")
        lines.append("---")
        lines.append("")
        return lines
    
    def _generate_submission_section(self) -> list[str]:
        """Generate section for submission quality check results."""
        lines = ["## 📋 Submission Quality Checks", ""]
        
        # Template info
        if self.template:
            lines.append(f"**Conference Template:** {self.template.name}")
            lines.append(f"**Page Limit:** {self.template.page_limit_review} (review) / {self.template.page_limit_camera} (camera-ready)")
            if self.template.mandatory_sections:
                lines.append(f"**Required Sections:** {', '.join(self.template.mandatory_sections)}")
            lines.append("")
        
        # Count by severity
        errors = [r for r in self.submission_results if r.severity == CheckSeverity.ERROR and not r.passed]
        warnings = [r for r in self.submission_results if r.severity == CheckSeverity.WARNING and not r.passed]
        infos = [r for r in self.submission_results if r.severity == CheckSeverity.INFO and not r.passed]
        
        # Summary
        if errors or warnings or infos:
            lines.append("| Severity | Count |")
            lines.append("|----------|-------|")
            if errors:
                lines.append(f"| 🔴 **Errors** | {len(errors)} |")
            if warnings:
                lines.append(f"| 🟡 **Warnings** | {len(warnings)} |")
            if infos:
                lines.append(f"| 🔵 **Suggestions** | {len(infos)} |")
            lines.append("")
        else:
            lines.append("🎉 **No submission issues found!**")
            lines.append("")
            return lines
        
        # Group by checker
        by_checker = {}
        for result in self.submission_results:
            if result.passed:
                continue
            if result.checker_name not in by_checker:
                by_checker[result.checker_name] = []
            by_checker[result.checker_name].append(result)
        
        def _format_one(result) -> list[str]:
            """Render a single CheckResult — line number only, no file path,
            no truncation. The HTML report follows the same convention."""
            buf = [f"- {result.message}"]
            if result.line_number:
                buf.append(f"  - Line {result.line_number}")
            if result.line_content:
                # Highlight the offending span if the checker provided one.
                content = result.line_content
                if getattr(result, "match_text", None) and result.match_text in content:
                    idx = content.index(result.match_text)
                    content = (content[:idx]
                               + "**" + result.match_text + "**"
                               + content[idx + len(result.match_text):])
                buf.append(f"  - `{content}`")
            if result.suggestion:
                buf.append(f"  - 💡 *{result.suggestion}*")
            return buf

        # Display errors first
        if errors:
            lines.append("### 🔴 Critical Errors")
            lines.append("")
            for result in errors:
                lines.extend(_format_one(result))
            lines.append("")

        # Display warnings
        if warnings:
            lines.append("### 🟡 Warnings")
            lines.append("")
            for result in warnings:
                lines.extend(_format_one(result))
            lines.append("")

        # Display suggestions (collapsible)
        if infos:
            lines.append("### 🔵 Suggestions")
            lines.append("<details>")
            lines.append("<summary>Click to view suggestions</summary>")
            lines.append("")
            for result in infos:
                lines.extend(_format_one(result))
            lines.append("")
            lines.append("</details>")
            lines.append("")
        
        return lines

    def _generate_footer(self) -> list[str]:
        """Generate report footer."""
        return [
            "",
            "---",
            f"Report generated by **BibGuard** on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
        ]
    
    def save(self, filepath: str):
        """Save report to file."""
        content = self.generate()
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(content)
    
    def save_bibliography_report(self, filepath: str):
        """Generate and save bibliography-only report (all bib-related checks)."""
        lines = []
        
        # Header
        lines.append("# Bibliography Validation Report")
        lines.append("")
        lines.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        lines.append("")
        lines.append("| Inputs | Count |")
        lines.append("|--------|-------|")
        lines.append(f"| **Bib File(s)** | {len(self.bib_files)} |")
        lines.append(f"| **TeX File(s)** | {len(self.tex_files)} |")
        lines.append("")
        
        # Disclaimer
        lines.extend(self._generate_disclaimer())
        lines.append("")
        
        # Summary - Bibliography only
        total = len(self.entries)
        verified = sum(1 for e in self.entries if self._is_verified(e))
        issues = sum(1 for e in self.entries if self._has_issues(e))
        
        has_usage = any(e.usage is not None for e in self.entries)
        if has_usage:
            used = sum(1 for e in self.entries if e.usage and e.usage.is_used)
            unused = total - used
            used_str = str(used)
            unused_str = str(unused)
            missing_str = str(len(self.missing_citations))
        else:
            used_str = "N/A"
            unused_str = "N/A"
            missing_str = "N/A"
        
        if self.duplicate_groups is None:
            dup_str = "N/A"
        else:
            dup_str = str(len(self.duplicate_groups))
        
        lines.append("## 📊 Summary")
        lines.append("")
        lines.append("| Metric | Count |")
        lines.append("|--------|-------|")
        lines.append(f"| **Total Entries** | {total} |")
        lines.append(f"| ✅ **Verified (Clean)** | {verified} |")
        lines.append(f"| ⚠️ **With Issues** | {issues} |")
        lines.append(f"| 📝 **Used in TeX** | {used_str} |")
        lines.append(f"| 🗑️ **Unused** | {unused_str} |")
        lines.append(f"| 🔄 **Duplicate Groups** | {dup_str} |")
        lines.append(f"| ❌ **Missing Bib Entries** | {missing_str} |")
        lines.append("")
        
        # Issues section
        lines.extend(self._generate_issues_section())
        lines.append("")
        
        # Verified entries
        lines.extend(self._generate_verified_section())
        lines.append("")
        
        # Footer
        lines.extend(self._generate_footer())
        
        content = "\n".join(lines)
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(content)
    
    # ------------------------------------------------------------------
    # JSON + standalone HTML output
    # ------------------------------------------------------------------
    def build_payload(self) -> Dict[str, Any]:
        """Build the JSON-serializable payload used by JSON & HTML outputs."""
        def _entry_dict(e: BibEntry) -> dict:
            return {
                "key": e.key, "entry_type": e.entry_type, "title": e.title,
                "author": e.author, "year": e.year, "journal": e.journal,
                "booktitle": e.booktitle, "publisher": e.publisher,
                "doi": e.doi, "arxiv_id": e.arxiv_id, "url": e.url,
                "volume": e.volume, "pages": e.pages,
            }

        def _comparison_dict(c: Optional[ComparisonResult]) -> Optional[dict]:
            if c is None: return None
            return {
                "is_match": c.is_match, "confidence": c.confidence,
                "title_match": c.title_match, "title_similarity": c.title_similarity,
                "author_match": c.author_match, "author_similarity": c.author_similarity,
                "year_match": c.year_match,
                "bib_title": c.bib_title, "fetched_title": c.fetched_title,
                "bib_authors": c.bib_authors, "fetched_authors": c.fetched_authors,
                "bib_year": c.bib_year, "fetched_year": c.fetched_year,
                "issues": list(c.issues), "source": c.source,
                "notes": list(getattr(c, "notes", []) or []),
                "published_version_hint": getattr(c, "published_version_hint", ""),
            }

        def _usage_dict(u: Optional[UsageResult]) -> Optional[dict]:
            if u is None: return None
            return {"is_used": u.is_used, "usage_count": getattr(u, "usage_count", 0)}

        def _eval_dict(ev: EvaluationResult) -> dict:
            return {
                "entry_key": ev.entry_key,
                "relevance_score": ev.relevance_score,
                "is_relevant": ev.is_relevant,
                "explanation": ev.explanation,
                "citation_role": getattr(ev, "citation_role", ""),
                "line_number": ev.line_number, "file_path": ev.file_path,
                "error": ev.error,
            }

        entries_payload = []
        for r in self.entries:
            entries_payload.append({
                "entry": _entry_dict(r.entry),
                "comparison": _comparison_dict(r.comparison),
                "usage": _usage_dict(r.usage),
                "evaluations": [_eval_dict(ev) for ev in (r.evaluations or [])],
            })

        sub_payload = []
        for r in self.submission_results:
            sub_payload.append({
                "checker": r.checker_name, "passed": r.passed,
                "severity": r.severity.value if hasattr(r.severity, "value") else str(r.severity),
                "message": r.message, "line_number": r.line_number,
                "line_content": r.line_content, "suggestion": r.suggestion,
                # file_path intentionally omitted — user-facing report should
                # never expose local tex paths.
                "match_text": getattr(r, "match_text", None),
            })

        retr_payload = []
        for f in self.retraction_findings:
            res = getattr(f, "result", None)
            retr_payload.append({
                "entry_key": getattr(f, "entry_key", ""),
                "doi": getattr(f, "doi", ""),
                "is_retracted": getattr(res, "is_retracted", False) if res else False,
                "update_type": getattr(res, "update_type", "") if res else "",
                "notice_doi": getattr(res, "notice_doi", "") if res else "",
                "notice_label": getattr(res, "notice_label", "") if res else "",
                "notice_url": getattr(res, "notice_url", "") if res else "",
            })

        url_payload = []
        for f in self.url_findings:
            url_payload.append({
                "entry_key": getattr(f, "entry_key", ""),
                "url": getattr(f, "url", ""),
                "status": getattr(f, "status", ""),
                "status_code": getattr(f, "status_code", None),
                "detail": getattr(f, "detail", ""),
            })

        duplicates = []
        for grp in (self.duplicate_groups or []):
            keys = [getattr(e, "key", "") for e in getattr(grp, "entries", [])]
            duplicates.append([k for k in keys if k])

        bib_stats, latex_stats = self.get_summary_stats()
        return {
            "meta": {
                "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                # Counts only — never expose source filenames in any
                # downstream artifact (HTML, JSON, anywhere else).
                "bib_files_count": len(self.bib_files),
                "tex_files_count": len(self.tex_files),
                "template": getattr(self.template, "name", "") if self.template else "",
            },
            "summary": {"bibliography": bib_stats, "latex": latex_stats},
            "entries": entries_payload,
            "submission_results": sub_payload,
            "retractions": retr_payload,
            "url_findings": url_payload,
            "duplicates": duplicates,
            "missing_citations": list(self.missing_citations),
        }

    def save_json(self, filepath: str) -> None:
        """Write a machine-readable JSON dump of the full report."""
        payload = self.build_payload()
        with open(filepath, "w", encoding="utf-8") as f:
            json.dump(payload, f, ensure_ascii=False, indent=2, default=_json_default)

    def save_html(self, filepath: str) -> None:
        """Write a single self-contained HTML report (CSS+JS inlined)."""
        payload = self.build_payload()
        html = render_standalone_html(payload)
        with open(filepath, "w", encoding="utf-8") as f:
            f.write(html)

    def save_latex_quality_report(self, filepath: str, submission_results: List[CheckResult], template=None):
        """Generate and save LaTeX quality report (all tex-related quality checks)."""
        lines = []
        
        # Header
        lines.append("# LaTeX Quality Report")
        lines.append("")
        lines.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        lines.append("")
        lines.append(f"**Inputs:** {len(self.tex_files)} TeX file(s)")
        lines.append("")
        
        if template:
            lines.append(f"**Template:** {template.name}")
            lines.append("")
        
        # Disclaimer
        lines.append("> **⚠️ Note:** This report contains automated quality checks for your LaTeX document. Please review all suggestions carefully before making changes.")
        lines.append("")
        
        # Summary
        error_count = sum(1 for r in submission_results if r.severity == CheckSeverity.ERROR)
        warning_count = sum(1 for r in submission_results if r.severity == CheckSeverity.WARNING)
        info_count = sum(1 for r in submission_results if r.severity == CheckSeverity.INFO)
        
        lines.append("## 📊 Summary")
        lines.append("")
        lines.append("| Severity | Count |")
        lines.append("|----------|-------|")
        lines.append(f"| 🔴 **Errors** | {error_count} |")
        lines.append(f"| 🟡 **Warnings** | {warning_count} |")
        lines.append(f"| 🔵 **Suggestions** | {info_count} |")
        lines.append("")
        
        # Detailed issues
        self.submission_results = submission_results
        self.template = template
        lines.extend(self._generate_submission_section())
        lines.append("")
        
        # Footer
        lines.append("---")
        lines.append("")
        lines.append(f"Report generated by **BibGuard** on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        
        content = "\n".join(lines)
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(content)