BibGuard

Sleeping

thinkwee

v2.0

fcffa22 17 days ago

38.3 kB

	"""
	Report generator for bibliography check results.
	"""
	import json
	import re
	from dataclasses import asdict, dataclass, is_dataclass
	from datetime import datetime
	from typing import Any, Optional, List, Dict
	from pathlib import Path

	from ..parsers.bib_parser import BibEntry
	from ..analyzers.metadata_comparator import ComparisonResult
	from ..analyzers.usage_checker import UsageResult
	from ..analyzers.llm_evaluator import EvaluationResult
	from ..analyzers.duplicate_detector import DuplicateGroup
	from ..checkers.base import CheckResult, CheckSeverity
	from .html_report import render_standalone_html


	@dataclass
	class EntryReport:
	"""Complete report for a single bib entry."""
	entry: BibEntry
	comparison: Optional[ComparisonResult]
	usage: Optional[UsageResult]
	evaluations: list[EvaluationResult]


	def _json_default(o):
	if is_dataclass(o):
	return asdict(o)
	if hasattr(o, "value"):
	return o.value
	return str(o)


	class ReportGenerator:
	"""Generates formatted markdown reports."""

	def __init__(self, minimal_verified: bool = False, check_preprint_ratio: bool = True, preprint_warning_threshold: float = 0.50):
	self.entries: list[EntryReport] = []
	self.missing_citations: list[str] = []
	self.duplicate_groups: list[DuplicateGroup] \| None = None # None means check not run
	self.bib_files: list[str] = []
	self.tex_files: list[str] = []
	self.bib_file: str = "" # Keep for backward compatibility/single file
	self.tex_file: str = "" # Keep for backward compatibility/single file
	self.minimal_verified = minimal_verified # Whether to show minimal info for verified entries
	self.submission_results: List[CheckResult] = [] # Submission quality check results
	self.template = None # Conference template if used
	self.check_preprint_ratio = check_preprint_ratio # Whether to check preprint ratio
	self.preprint_warning_threshold = preprint_warning_threshold # Threshold for preprint warning
	self.retraction_findings: list = [] # F1 results
	self.url_findings: list = [] # F2 results

	def set_retraction_findings(self, findings) -> None:
	self.retraction_findings = list(findings or [])

	def set_url_findings(self, findings) -> None:
	self.url_findings = list(findings or [])


	def add_entry_report(self, report: EntryReport):
	"""Add an entry report."""
	self.entries.append(report)

	def set_metadata(self, bib_files: str \| list[str], tex_files: str \| list[str]):
	"""Set source file information."""
	if isinstance(bib_files, str):
	self.bib_files = [bib_files]
	self.bib_file = bib_files
	else:
	self.bib_files = bib_files
	self.bib_file = bib_files[0] if bib_files else ""

	if isinstance(tex_files, str):
	self.tex_files = [tex_files]
	self.tex_file = tex_files
	else:
	self.tex_files = tex_files
	self.tex_file = tex_files[0] if tex_files else ""

	def set_missing_citations(self, missing: list[str]):
	"""Set list of citations without bib entries."""
	self.missing_citations = missing

	def set_duplicate_groups(self, groups: list[DuplicateGroup]):
	"""Set list of duplicate entry groups."""
	self.duplicate_groups = groups

	def set_submission_results(self, results: List[CheckResult], template=None):
	"""Set submission quality check results."""
	self.submission_results = results
	self.template = template

	def generate(self) -> str:
	"""Generate the full markdown report."""
	lines = []

	# Header
	lines.extend(self._generate_header())
	lines.append("")

	# Disclaimer
	lines.extend(self._generate_disclaimer())
	lines.append("")

	# Summary statistics
	lines.extend(self._generate_summary())
	lines.append("")

	# ⚠️ Critical Issues (Detailed) - Bibliography-related issues
	lines.extend(self._generate_issues_section())
	lines.append("")

	# ✅ Verified Entries (Clean)
	lines.extend(self._generate_verified_section())
	lines.append("")

	# 📋 Submission Quality Checks (LaTeX quality checks)
	if self.submission_results:
	lines.extend(self._generate_submission_section())
	lines.append("")

	# Footer
	lines.extend(self._generate_footer())

	return "\n".join(lines)

	def get_summary_stats(self) -> tuple[dict, dict]:
	"""Get summary statistics as dictionaries for console display (Issues only)."""
	total = len(self.entries)

	# Bibliography issues breakdown
	title_mismatches = 0
	author_mismatches = 0
	year_mismatches = 0
	low_relevance = 0
	unable_to_verify = 0

	for e in self.entries:
	# Metadata issues
	if e.comparison:
	if e.comparison.has_issues:
	# Categorize issues
	has_title = False
	has_author = False
	has_year = False

	for issue in e.comparison.issues:
	if "Title mismatch" in issue: has_title = True
	elif "Author mismatch" in issue: has_author = True
	elif "Year mismatch" in issue: has_year = True
	elif "Unable to find" in issue: unable_to_verify += 1

	if has_title: title_mismatches += 1
	if has_author: author_mismatches += 1
	if has_year: year_mismatches += 1

	# Relevance issues
	if any(ev.relevance_score <= 2 for ev in e.evaluations):
	low_relevance += 1

	bib_stats = {}
	if title_mismatches > 0: bib_stats["Title Mismatches"] = title_mismatches
	if author_mismatches > 0: bib_stats["Author Mismatches"] = author_mismatches
	if year_mismatches > 0: bib_stats["Year Mismatches"] = year_mismatches
	if low_relevance > 0: bib_stats["Low Relevance"] = low_relevance
	if unable_to_verify > 0: bib_stats["Unable to Verify"] = unable_to_verify

	if self.duplicate_groups:
	bib_stats["Duplicate Groups"] = len(self.duplicate_groups)

	if self.missing_citations:
	bib_stats["Missing Bib Entries"] = len(self.missing_citations)

	unused = [e for e in self.entries if e.usage and not e.usage.is_used]
	if unused:
	bib_stats["Unused Entries"] = len(unused)

	# LaTeX stats - Group by precise Rule Names
	latex_stats = {}

	# Rule mapping for professional display names
	RULE_MAPPING = {
	"Very long sentence": "Sentence Length (Critical)",
	"Long sentence": "Sentence Length (Warning)",
	"Possible Markdown bullet point": "Markdown Bullet Point",
	"Possible Markdown numbered list": "Markdown Numbered List",
	"Possible Markdown italic": "Markdown Italic",
	"Possible Markdown bold": "Markdown Bold",
	"Inconsistent hyphenation": "Hyphenation Inconsistency",
	"Inconsistent spelling": "Spelling Inconsistency",
	"Unreferenced figure": "Unreferenced Figure",
	"Unreferenced table": "Unreferenced Table",
	"Unreferenced section": "Unreferenced Section",
	"Unreferenced label": "Unreferenced Label",
	"Citation from": "Old Citation (10+ years)",
	"Hedging language": "Hedging/Vague Language",
	"Redundant phrase": "Redundant Phrasing",
	"Weak start with": "Weak Sentence Starter",
	"Unescaped &": "Unescaped Special Character",
	"Citation without non-breaking space": "Missing Non-breaking Space (~)",
	"Mixed citation styles": "Mixed Citation Styles",
	"Mixed inline math": "Mixed Math Notation",
	"Appendix section": "Unreferenced Appendix",
	"Missing space before unit": "Unit Spacing Issue"
	}

	for r in self.submission_results:
	if r.passed:
	continue

	raw_msg = r.message
	rule_name = "Unknown Rule"

	# Match against our professional rule names
	matched = False
	for pattern, official_name in RULE_MAPPING.items():
	if pattern in raw_msg:
	rule_name = official_name
	matched = True
	break

	if not matched:
	# Fallback: Clean the message (remove dynamic parts)
	clean_msg = re.sub(r"\(.*?\)", "", raw_msg)
	clean_msg = re.sub(r"'.*?'", "", clean_msg)
	clean_msg = re.sub(r"\d+", "", clean_msg)
	rule_name = clean_msg.split(":")[0].strip()

	if rule_name not in latex_stats:
	latex_stats[rule_name] = 0
	latex_stats[rule_name] += 1

	return bib_stats, latex_stats

	def generate_console_output(self) -> str:
	"""Generate console-friendly output (Summary + Issues only)."""
	lines = []

	# Summary statistics
	lines.extend(self._generate_summary())
	lines.append("")

	# Critical Issues
	lines.extend(self._generate_issues_section())
	lines.append("")

	return "\n".join(lines)

	def _generate_header(self) -> list[str]:
	"""Generate report header.

	File names are intentionally not printed — keep the report
	portable, and never expose local source paths to anyone the
	report is shared with.
	"""
	timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
	return [
	"# Bibliography Validation Report",
	"",
	f"Generated: {timestamp}",
	"",
	"\| Inputs \| Count \|",
	"\|--------\|-------\|",
	f"\| Bib File(s) \| {len(self.bib_files)} \|",
	f"\| TeX File(s) \| {len(self.tex_files)} \|",
	]

	def _generate_disclaimer(self) -> list[str]:
	"""Generate disclaimer section."""
	return [
	"> ⚠️ Disclaimer: This report is generated by an automated tool. While BibGuard strives for accuracy, it may produce false positives or miss certain issues. This tool cannot replace human review. Please manually verify all reported issues before making changes to your bibliography."
	]

	def _generate_summary(self) -> list[str]:
	"""Generate summary statistics."""
	total = len(self.entries)

	# Check availability of results
	has_metadata = any(e.comparison is not None for e in self.entries)
	has_usage = any(e.usage is not None for e in self.entries)
	has_eval = any(len(e.evaluations) > 0 for e in self.entries)

	# Calculate Verified/Issues
	# Note: _is_verified depends on _has_issues.
	# If a check wasn't run, it won't contribute to issues.
	verified = sum(1 for e in self.entries if self._is_verified(e))
	issues = sum(1 for e in self.entries if self._has_issues(e))

	# Usage stats
	if has_usage:
	used = sum(1 for e in self.entries if e.usage and e.usage.is_used)
	unused = total - used
	used_str = str(used)
	unused_str = str(unused)
	missing_str = str(len(self.missing_citations))
	else:
	used_str = "N/A"
	unused_str = "N/A"
	missing_str = "N/A"

	# Duplicate stats - show N/A if check wasn't run (duplicate_groups is None means not checked)
	if self.duplicate_groups is None:
	dup_str = "N/A"
	else:
	dup_str = str(len(self.duplicate_groups))

	# Preprint detection (only if enabled)
	preprint_str = "N/A"
	preprint_warning = []
	if self.check_preprint_ratio and has_usage:
	used_entries = [e for e in self.entries if e.usage and e.usage.is_used]
	if used_entries:
	preprint_count = sum(1 for e in used_entries if self._is_preprint(e.entry))
	preprint_ratio = preprint_count / len(used_entries)
	preprint_str = f"{preprint_count} ({preprint_ratio:.1%})"

	# Warning if exceeds threshold
	if preprint_ratio > self.preprint_warning_threshold:
	preprint_warning = [
	"",
	f"> ⚠️ High Preprint Ratio Warning: {preprint_ratio:.1%} of your used references are preprints (arXiv, bioRxiv, etc.). Consider replacing some with peer-reviewed publications if available."
	]

	summary_lines = [
	"## 📊 Summary",
	"",
	"### 📚 Bibliography Statistics",
	"",
	"\| Metric \| Count \|",
	"\|--------\|-------\|",
	f"\| Total Entries \| {total} \|",
	f"\| ✅ Verified (Clean) \| {verified} \|",
	f"\| ⚠️ With Issues \| {issues} \|",
	f"\| 📝 Used in TeX \| {used_str} \|",
	f"\| 🗑️ Unused \| {unused_str} \|",
	f"\| 🔄 Duplicate Groups \| {dup_str} \|",
	f"\| ❌ Missing Bib Entries \| {missing_str} \|",
	f"\| 📄 Preprints (Used) \| {preprint_str} \|",
	]

	# Add warning if needed
	if preprint_warning:
	summary_lines.extend(preprint_warning)

	summary_lines.extend([
	"",
	"### 📋 LaTeX Quality Checks",
	"",
	self._get_submission_summary()
	])

	return summary_lines

	def _is_preprint(self, entry: BibEntry) -> bool:
	"""Check if an entry is a preprint."""
	# Preprint indicators
	preprint_keywords = [
	'arxiv', 'biorxiv', 'medrxiv', 'ssrn', 'preprint',
	'openreview', 'techreport', 'technical report', 'working paper',
	'tech report', 'tech. report'
	]

	# Check entry type
	if entry.entry_type.lower() in ['techreport', 'unpublished', 'misc']:
	# Further check if it's actually a preprint
	text_to_check = ' '.join([
	entry.journal.lower(),
	entry.booktitle.lower(),
	entry.publisher.lower(),
	entry.entry_type.lower()
	])

	if any(keyword in text_to_check for keyword in preprint_keywords):
	return True

	# Check if arXiv ID exists
	if entry.has_arxiv:
	return True

	# Check journal/booktitle/publisher fields
	venue_text = ' '.join([
	entry.journal.lower(),
	entry.booktitle.lower(),
	entry.publisher.lower()
	])

	return any(keyword in venue_text for keyword in preprint_keywords)

	def _get_submission_summary(self) -> str:
	"""Generate submission quality summary table."""
	if not self.submission_results:
	return "No quality checks were performed."

	# Count by severity
	error_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.ERROR)
	warning_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.WARNING)
	info_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.INFO)

	lines = [
	"\| Severity \| Count \|",
	"\|----------\|-------\|",
	f"\| 🔴 Errors \| {error_count} \|",
	f"\| 🟡 Warnings \| {warning_count} \|",
	f"\| 🔵 Suggestions \| {info_count} \|"
	]
	return "\n".join(lines)

	def _is_verified(self, entry: EntryReport) -> bool:
	"""Check if entry is clean (no issues)."""
	return not self._has_issues(entry)

	def _has_issues(self, entry: EntryReport) -> bool:
	"""Check if entry has any issues."""
	# Metadata issues
	if entry.comparison and entry.comparison.has_issues:
	return True
	# LLM issues (low relevance)
	if any(ev.relevance_score <= 2 for ev in entry.evaluations):
	return True
	# NOTE: We don't include usage issues (unused) here because
	# unused entries are already shown in the "Unused Entries" section
	return False

	def _has_metadata_or_relevance_issues(self, entry: EntryReport) -> bool:
	"""Check if entry has metadata or relevance issues (excluding duplicate/unused)."""
	# Metadata issues
	if entry.comparison and entry.comparison.has_issues:
	return True
	# LLM issues (low relevance)
	if any(ev.relevance_score <= 2 for ev in entry.evaluations):
	return True
	return False

	def _generate_issues_section(self) -> list[str]:
	"""Generate detailed section for entries with issues."""
	lines = ["## ⚠️ Critical Issues Detected", ""]

	has_any_issues = False

	# 1. Missing Citations
	if self.missing_citations:
	has_any_issues = True
	lines.append("### ❌ Missing Bibliography Entries")
	lines.append("The following keys are cited in the TeX file but missing from the .bib file:")
	lines.append("")
	for key in self.missing_citations:
	lines.append(f"- `{key}`")
	lines.append("")

	# 2. Duplicate Entries
	if self.duplicate_groups:
	has_any_issues = True
	lines.append("### 🔄 Duplicate Entries")
	for i, group in enumerate(self.duplicate_groups, 1):
	lines.append(f"#### Group {i} (Similarity: {group.similarity_score:.0%})")
	lines.append(f"Reason: {group.reason}")
	lines.append("")
	lines.append("\| Key \| Title \| Year \|")
	lines.append("\|-----\|-------\|------\|")
	for entry in group.entries:
	lines.append(f"\| `{entry.key}` \| {entry.title} \| {entry.year} \|")
	lines.append("")

	# 3. Unused Entries
	unused = [e for e in self.entries if e.usage and not e.usage.is_used]
	if unused:
	has_any_issues = True
	lines.append("### 🗑️ Unused Entries")
	lines.append("The following entries are in the .bib file but NOT cited in the TeX file:")
	lines.append("")
	for e in unused:
	lines.append(f"- `{e.entry.key}`: {e.entry.title}")
	lines.append("")

	# 4. Metadata Mismatches & Low Relevance
	issue_entries = [e for e in self.entries if self._has_metadata_or_relevance_issues(e)]

	if issue_entries:
	has_any_issues = True
	lines.append("### ⚠️ Metadata & Relevance Issues")

	for entry_report in issue_entries:
	lines.extend(self._format_entry_detail(entry_report, is_verified=False))

	if not has_any_issues:
	lines.append("🎉 No critical issues found!")

	return lines

	def _generate_verified_section(self) -> list[str]:
	"""Generate section for verified entries."""
	lines = ["## ✅ Verified Entries", ""]

	verified = [e for e in self.entries if self._is_verified(e)]

	if not verified:
	lines.append("_No verified entries found._")
	return lines

	lines.append(f"Found {len(verified)} entries with correct metadata.")
	lines.append("")

	# Use a collapsible details block for clean UI
	lines.append("<details>")
	lines.append("<summary>Click to view verified entries</summary>")
	lines.append("")

	for entry_report in verified:
	lines.extend(self._format_entry_detail(entry_report, minimal=self.minimal_verified, is_verified=True))

	lines.append("</details>")
	return lines

	def _format_entry_detail(self, report: EntryReport, minimal: bool = False, is_verified: bool = False) -> list[str]:
	"""Format a single entry report in Markdown."""
	entry = report.entry
	comp = report.comparison
	lines = []

	# Title header - use checkmark for verified entries, warning for issues
	icon = "✅" if is_verified else "⚠️"
	lines.append(f"#### {icon} `{entry.key}`")
	lines.append(f"Title: {entry.title}")
	lines.append("")

	# Metadata Status
	if comp:
	status_icon = "✅" if comp.is_match else "❌"
	lines.append(f"- Metadata Status: {status_icon} {comp.source.upper()} (Confidence: {comp.confidence:.1%})")

	if comp.has_issues and not minimal:
	lines.append(" - Discrepancies:")
	for issue in comp.issues:
	# Format mismatch details nicely
	if "Mismatch" in issue or "mismatch" in issue:
	lines.append(f" - 🔴 {issue}")
	if "Title" in issue:
	lines.append(f" - Bib: `{comp.bib_title}`")
	lines.append(f" - Fetched: `{comp.fetched_title}`")
	elif "Author" in issue:
	lines.append(f" - Bib: `{', '.join(comp.bib_authors)}`")
	lines.append(f" - Fetched: `{', '.join(comp.fetched_authors)}`")
	else:
	lines.append(f" - 🔸 {issue}")

	# Positive notes (corroboration, year-tolerance) — separate from issues.
	notes = list(getattr(comp, "notes", []) or [])
	if notes and not minimal:
	lines.append(" - Notes:")
	for note in notes:
	lines.append(f" - 🟢 {note}")

	# Relevance Status
	if report.evaluations and not minimal:
	lines.append("- Relevance Analysis:")
	for eval_res in report.evaluations:
	score_icon = "🟢" if eval_res.relevance_score >= 4 else ("🟡" if eval_res.relevance_score == 3 else "🔴")
	lines.append(f" - {score_icon} Score {eval_res.relevance_score}/5 ({eval_res.score_label})")
	if eval_res.line_number:
	lines.append(f" - Line {eval_res.line_number}")
	lines.append(f" - \"{eval_res.explanation}\"")

	lines.append("")
	lines.append("---")
	lines.append("")
	return lines

	def _generate_submission_section(self) -> list[str]:
	"""Generate section for submission quality check results."""
	lines = ["## 📋 Submission Quality Checks", ""]

	# Template info
	if self.template:
	lines.append(f"Conference Template: {self.template.name}")
	lines.append(f"Page Limit: {self.template.page_limit_review} (review) / {self.template.page_limit_camera} (camera-ready)")
	if self.template.mandatory_sections:
	lines.append(f"Required Sections: {', '.join(self.template.mandatory_sections)}")
	lines.append("")

	# Count by severity
	errors = [r for r in self.submission_results if r.severity == CheckSeverity.ERROR and not r.passed]
	warnings = [r for r in self.submission_results if r.severity == CheckSeverity.WARNING and not r.passed]
	infos = [r for r in self.submission_results if r.severity == CheckSeverity.INFO and not r.passed]

	# Summary
	if errors or warnings or infos:
	lines.append("\| Severity \| Count \|")
	lines.append("\|----------\|-------\|")
	if errors:
	lines.append(f"\| 🔴 Errors \| {len(errors)} \|")
	if warnings:
	lines.append(f"\| 🟡 Warnings \| {len(warnings)} \|")
	if infos:
	lines.append(f"\| 🔵 Suggestions \| {len(infos)} \|")
	lines.append("")
	else:
	lines.append("🎉 No submission issues found!")
	lines.append("")
	return lines

	# Group by checker
	by_checker = {}
	for result in self.submission_results:
	if result.passed:
	continue
	if result.checker_name not in by_checker:
	by_checker[result.checker_name] = []
	by_checker[result.checker_name].append(result)

	def _format_one(result) -> list[str]:
	"""Render a single CheckResult — line number only, no file path,
	no truncation. The HTML report follows the same convention."""
	buf = [f"- {result.message}"]
	if result.line_number:
	buf.append(f" - Line {result.line_number}")
	if result.line_content:
	# Highlight the offending span if the checker provided one.
	content = result.line_content
	if getattr(result, "match_text", None) and result.match_text in content:
	idx = content.index(result.match_text)
	content = (content[:idx]
	+ "" + result.match_text + ""
	+ content[idx + len(result.match_text):])
	buf.append(f" - `{content}`")
	if result.suggestion:
	buf.append(f" - 💡 {result.suggestion}")
	return buf

	# Display errors first
	if errors:
	lines.append("### 🔴 Critical Errors")
	lines.append("")
	for result in errors:
	lines.extend(_format_one(result))
	lines.append("")

	# Display warnings
	if warnings:
	lines.append("### 🟡 Warnings")
	lines.append("")
	for result in warnings:
	lines.extend(_format_one(result))
	lines.append("")

	# Display suggestions (collapsible)
	if infos:
	lines.append("### 🔵 Suggestions")
	lines.append("<details>")
	lines.append("<summary>Click to view suggestions</summary>")
	lines.append("")
	for result in infos:
	lines.extend(_format_one(result))
	lines.append("")
	lines.append("</details>")
	lines.append("")

	return lines

	def _generate_footer(self) -> list[str]:
	"""Generate report footer."""
	return [
	"",
	"---",
	f"Report generated by BibGuard on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
	]

	def save(self, filepath: str):
	"""Save report to file."""
	content = self.generate()
	with open(filepath, 'w', encoding='utf-8') as f:
	f.write(content)

	def save_bibliography_report(self, filepath: str):
	"""Generate and save bibliography-only report (all bib-related checks)."""
	lines = []

	# Header
	lines.append("# Bibliography Validation Report")
	lines.append("")
	lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	lines.append("")
	lines.append("\| Inputs \| Count \|")
	lines.append("\|--------\|-------\|")
	lines.append(f"\| Bib File(s) \| {len(self.bib_files)} \|")
	lines.append(f"\| TeX File(s) \| {len(self.tex_files)} \|")
	lines.append("")

	# Disclaimer
	lines.extend(self._generate_disclaimer())
	lines.append("")

	# Summary - Bibliography only
	total = len(self.entries)
	verified = sum(1 for e in self.entries if self._is_verified(e))
	issues = sum(1 for e in self.entries if self._has_issues(e))

	has_usage = any(e.usage is not None for e in self.entries)
	if has_usage:
	used = sum(1 for e in self.entries if e.usage and e.usage.is_used)
	unused = total - used
	used_str = str(used)
	unused_str = str(unused)
	missing_str = str(len(self.missing_citations))
	else:
	used_str = "N/A"
	unused_str = "N/A"
	missing_str = "N/A"

	if self.duplicate_groups is None:
	dup_str = "N/A"
	else:
	dup_str = str(len(self.duplicate_groups))

	lines.append("## 📊 Summary")
	lines.append("")
	lines.append("\| Metric \| Count \|")
	lines.append("\|--------\|-------\|")
	lines.append(f"\| Total Entries \| {total} \|")
	lines.append(f"\| ✅ Verified (Clean) \| {verified} \|")
	lines.append(f"\| ⚠️ With Issues \| {issues} \|")
	lines.append(f"\| 📝 Used in TeX \| {used_str} \|")
	lines.append(f"\| 🗑️ Unused \| {unused_str} \|")
	lines.append(f"\| 🔄 Duplicate Groups \| {dup_str} \|")
	lines.append(f"\| ❌ Missing Bib Entries \| {missing_str} \|")
	lines.append("")

	# Issues section
	lines.extend(self._generate_issues_section())
	lines.append("")

	# Verified entries
	lines.extend(self._generate_verified_section())
	lines.append("")

	# Footer
	lines.extend(self._generate_footer())

	content = "\n".join(lines)
	with open(filepath, 'w', encoding='utf-8') as f:
	f.write(content)

	# ------------------------------------------------------------------
	# JSON + standalone HTML output
	# ------------------------------------------------------------------
	def build_payload(self) -> Dict[str, Any]:
	"""Build the JSON-serializable payload used by JSON & HTML outputs."""
	def _entry_dict(e: BibEntry) -> dict:
	return {
	"key": e.key, "entry_type": e.entry_type, "title": e.title,
	"author": e.author, "year": e.year, "journal": e.journal,
	"booktitle": e.booktitle, "publisher": e.publisher,
	"doi": e.doi, "arxiv_id": e.arxiv_id, "url": e.url,
	"volume": e.volume, "pages": e.pages,
	}

	def _comparison_dict(c: Optional[ComparisonResult]) -> Optional[dict]:
	if c is None: return None
	return {
	"is_match": c.is_match, "confidence": c.confidence,
	"title_match": c.title_match, "title_similarity": c.title_similarity,
	"author_match": c.author_match, "author_similarity": c.author_similarity,
	"year_match": c.year_match,
	"bib_title": c.bib_title, "fetched_title": c.fetched_title,
	"bib_authors": c.bib_authors, "fetched_authors": c.fetched_authors,
	"bib_year": c.bib_year, "fetched_year": c.fetched_year,
	"issues": list(c.issues), "source": c.source,
	"notes": list(getattr(c, "notes", []) or []),
	"published_version_hint": getattr(c, "published_version_hint", ""),
	}

	def _usage_dict(u: Optional[UsageResult]) -> Optional[dict]:
	if u is None: return None
	return {"is_used": u.is_used, "usage_count": getattr(u, "usage_count", 0)}

	def _eval_dict(ev: EvaluationResult) -> dict:
	return {
	"entry_key": ev.entry_key,
	"relevance_score": ev.relevance_score,
	"is_relevant": ev.is_relevant,
	"explanation": ev.explanation,
	"citation_role": getattr(ev, "citation_role", ""),
	"line_number": ev.line_number, "file_path": ev.file_path,
	"error": ev.error,
	}

	entries_payload = []
	for r in self.entries:
	entries_payload.append({
	"entry": _entry_dict(r.entry),
	"comparison": _comparison_dict(r.comparison),
	"usage": _usage_dict(r.usage),
	"evaluations": [_eval_dict(ev) for ev in (r.evaluations or [])],
	})

	sub_payload = []
	for r in self.submission_results:
	sub_payload.append({
	"checker": r.checker_name, "passed": r.passed,
	"severity": r.severity.value if hasattr(r.severity, "value") else str(r.severity),
	"message": r.message, "line_number": r.line_number,
	"line_content": r.line_content, "suggestion": r.suggestion,
	# file_path intentionally omitted — user-facing report should
	# never expose local tex paths.
	"match_text": getattr(r, "match_text", None),
	})

	retr_payload = []
	for f in self.retraction_findings:
	res = getattr(f, "result", None)
	retr_payload.append({
	"entry_key": getattr(f, "entry_key", ""),
	"doi": getattr(f, "doi", ""),
	"is_retracted": getattr(res, "is_retracted", False) if res else False,
	"update_type": getattr(res, "update_type", "") if res else "",
	"notice_doi": getattr(res, "notice_doi", "") if res else "",
	"notice_label": getattr(res, "notice_label", "") if res else "",
	"notice_url": getattr(res, "notice_url", "") if res else "",
	})

	url_payload = []
	for f in self.url_findings:
	url_payload.append({
	"entry_key": getattr(f, "entry_key", ""),
	"url": getattr(f, "url", ""),
	"status": getattr(f, "status", ""),
	"status_code": getattr(f, "status_code", None),
	"detail": getattr(f, "detail", ""),
	})

	duplicates = []
	for grp in (self.duplicate_groups or []):
	keys = [getattr(e, "key", "") for e in getattr(grp, "entries", [])]
	duplicates.append([k for k in keys if k])

	bib_stats, latex_stats = self.get_summary_stats()
	return {
	"meta": {
	"generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	# Counts only — never expose source filenames in any
	# downstream artifact (HTML, JSON, anywhere else).
	"bib_files_count": len(self.bib_files),
	"tex_files_count": len(self.tex_files),
	"template": getattr(self.template, "name", "") if self.template else "",
	},
	"summary": {"bibliography": bib_stats, "latex": latex_stats},
	"entries": entries_payload,
	"submission_results": sub_payload,
	"retractions": retr_payload,
	"url_findings": url_payload,
	"duplicates": duplicates,
	"missing_citations": list(self.missing_citations),
	}

	def save_json(self, filepath: str) -> None:
	"""Write a machine-readable JSON dump of the full report."""
	payload = self.build_payload()
	with open(filepath, "w", encoding="utf-8") as f:
	json.dump(payload, f, ensure_ascii=False, indent=2, default=_json_default)

	def save_html(self, filepath: str) -> None:
	"""Write a single self-contained HTML report (CSS+JS inlined)."""
	payload = self.build_payload()
	html = render_standalone_html(payload)
	with open(filepath, "w", encoding="utf-8") as f:
	f.write(html)

	def save_latex_quality_report(self, filepath: str, submission_results: List[CheckResult], template=None):
	"""Generate and save LaTeX quality report (all tex-related quality checks)."""
	lines = []

	# Header
	lines.append("# LaTeX Quality Report")
	lines.append("")
	lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
	lines.append("")
	lines.append(f"Inputs: {len(self.tex_files)} TeX file(s)")
	lines.append("")

	if template:
	lines.append(f"Template: {template.name}")
	lines.append("")

	# Disclaimer
	lines.append("> ⚠️ Note: This report contains automated quality checks for your LaTeX document. Please review all suggestions carefully before making changes.")
	lines.append("")

	# Summary
	error_count = sum(1 for r in submission_results if r.severity == CheckSeverity.ERROR)
	warning_count = sum(1 for r in submission_results if r.severity == CheckSeverity.WARNING)
	info_count = sum(1 for r in submission_results if r.severity == CheckSeverity.INFO)

	lines.append("## 📊 Summary")
	lines.append("")
	lines.append("\| Severity \| Count \|")
	lines.append("\|----------\|-------\|")
	lines.append(f"\| 🔴 Errors \| {error_count} \|")
	lines.append(f"\| 🟡 Warnings \| {warning_count} \|")
	lines.append(f"\| 🔵 Suggestions \| {info_count} \|")
	lines.append("")

	# Detailed issues
	self.submission_results = submission_results
	self.template = template
	lines.extend(self._generate_submission_section())
	lines.append("")

	# Footer
	lines.append("---")
	lines.append("")
	lines.append(f"Report generated by BibGuard on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

	content = "\n".join(lines)
	with open(filepath, 'w', encoding='utf-8') as f:
	f.write(content)