""" Report generator for bibliography check results. """ import json import re from dataclasses import asdict, dataclass, is_dataclass from datetime import datetime from typing import Any, Optional, List, Dict from pathlib import Path from ..parsers.bib_parser import BibEntry from ..analyzers.metadata_comparator import ComparisonResult from ..analyzers.usage_checker import UsageResult from ..analyzers.llm_evaluator import EvaluationResult from ..analyzers.duplicate_detector import DuplicateGroup from ..checkers.base import CheckResult, CheckSeverity from .html_report import render_standalone_html @dataclass class EntryReport: """Complete report for a single bib entry.""" entry: BibEntry comparison: Optional[ComparisonResult] usage: Optional[UsageResult] evaluations: list[EvaluationResult] def _json_default(o): if is_dataclass(o): return asdict(o) if hasattr(o, "value"): return o.value return str(o) class ReportGenerator: """Generates formatted markdown reports.""" def __init__(self, minimal_verified: bool = False, check_preprint_ratio: bool = True, preprint_warning_threshold: float = 0.50): self.entries: list[EntryReport] = [] self.missing_citations: list[str] = [] self.duplicate_groups: list[DuplicateGroup] | None = None # None means check not run self.bib_files: list[str] = [] self.tex_files: list[str] = [] self.bib_file: str = "" # Keep for backward compatibility/single file self.tex_file: str = "" # Keep for backward compatibility/single file self.minimal_verified = minimal_verified # Whether to show minimal info for verified entries self.submission_results: List[CheckResult] = [] # Submission quality check results self.template = None # Conference template if used self.check_preprint_ratio = check_preprint_ratio # Whether to check preprint ratio self.preprint_warning_threshold = preprint_warning_threshold # Threshold for preprint warning self.retraction_findings: list = [] # F1 results self.url_findings: list = [] # F2 results def set_retraction_findings(self, findings) -> None: self.retraction_findings = list(findings or []) def set_url_findings(self, findings) -> None: self.url_findings = list(findings or []) def add_entry_report(self, report: EntryReport): """Add an entry report.""" self.entries.append(report) def set_metadata(self, bib_files: str | list[str], tex_files: str | list[str]): """Set source file information.""" if isinstance(bib_files, str): self.bib_files = [bib_files] self.bib_file = bib_files else: self.bib_files = bib_files self.bib_file = bib_files[0] if bib_files else "" if isinstance(tex_files, str): self.tex_files = [tex_files] self.tex_file = tex_files else: self.tex_files = tex_files self.tex_file = tex_files[0] if tex_files else "" def set_missing_citations(self, missing: list[str]): """Set list of citations without bib entries.""" self.missing_citations = missing def set_duplicate_groups(self, groups: list[DuplicateGroup]): """Set list of duplicate entry groups.""" self.duplicate_groups = groups def set_submission_results(self, results: List[CheckResult], template=None): """Set submission quality check results.""" self.submission_results = results self.template = template def generate(self) -> str: """Generate the full markdown report.""" lines = [] # Header lines.extend(self._generate_header()) lines.append("") # Disclaimer lines.extend(self._generate_disclaimer()) lines.append("") # Summary statistics lines.extend(self._generate_summary()) lines.append("") # ⚠️ Critical Issues (Detailed) - Bibliography-related issues lines.extend(self._generate_issues_section()) lines.append("") # ✅ Verified Entries (Clean) lines.extend(self._generate_verified_section()) lines.append("") # 📋 Submission Quality Checks (LaTeX quality checks) if self.submission_results: lines.extend(self._generate_submission_section()) lines.append("") # Footer lines.extend(self._generate_footer()) return "\n".join(lines) def get_summary_stats(self) -> tuple[dict, dict]: """Get summary statistics as dictionaries for console display (Issues only).""" total = len(self.entries) # Bibliography issues breakdown title_mismatches = 0 author_mismatches = 0 year_mismatches = 0 low_relevance = 0 unable_to_verify = 0 for e in self.entries: # Metadata issues if e.comparison: if e.comparison.has_issues: # Categorize issues has_title = False has_author = False has_year = False for issue in e.comparison.issues: if "Title mismatch" in issue: has_title = True elif "Author mismatch" in issue: has_author = True elif "Year mismatch" in issue: has_year = True elif "Unable to find" in issue: unable_to_verify += 1 if has_title: title_mismatches += 1 if has_author: author_mismatches += 1 if has_year: year_mismatches += 1 # Relevance issues if any(ev.relevance_score <= 2 for ev in e.evaluations): low_relevance += 1 bib_stats = {} if title_mismatches > 0: bib_stats["Title Mismatches"] = title_mismatches if author_mismatches > 0: bib_stats["Author Mismatches"] = author_mismatches if year_mismatches > 0: bib_stats["Year Mismatches"] = year_mismatches if low_relevance > 0: bib_stats["Low Relevance"] = low_relevance if unable_to_verify > 0: bib_stats["Unable to Verify"] = unable_to_verify if self.duplicate_groups: bib_stats["Duplicate Groups"] = len(self.duplicate_groups) if self.missing_citations: bib_stats["Missing Bib Entries"] = len(self.missing_citations) unused = [e for e in self.entries if e.usage and not e.usage.is_used] if unused: bib_stats["Unused Entries"] = len(unused) # LaTeX stats - Group by precise Rule Names latex_stats = {} # Rule mapping for professional display names RULE_MAPPING = { "Very long sentence": "Sentence Length (Critical)", "Long sentence": "Sentence Length (Warning)", "Possible Markdown bullet point": "Markdown Bullet Point", "Possible Markdown numbered list": "Markdown Numbered List", "Possible Markdown italic": "Markdown Italic", "Possible Markdown bold": "Markdown Bold", "Inconsistent hyphenation": "Hyphenation Inconsistency", "Inconsistent spelling": "Spelling Inconsistency", "Unreferenced figure": "Unreferenced Figure", "Unreferenced table": "Unreferenced Table", "Unreferenced section": "Unreferenced Section", "Unreferenced label": "Unreferenced Label", "Citation from": "Old Citation (10+ years)", "Hedging language": "Hedging/Vague Language", "Redundant phrase": "Redundant Phrasing", "Weak start with": "Weak Sentence Starter", "Unescaped &": "Unescaped Special Character", "Citation without non-breaking space": "Missing Non-breaking Space (~)", "Mixed citation styles": "Mixed Citation Styles", "Mixed inline math": "Mixed Math Notation", "Appendix section": "Unreferenced Appendix", "Missing space before unit": "Unit Spacing Issue" } for r in self.submission_results: if r.passed: continue raw_msg = r.message rule_name = "Unknown Rule" # Match against our professional rule names matched = False for pattern, official_name in RULE_MAPPING.items(): if pattern in raw_msg: rule_name = official_name matched = True break if not matched: # Fallback: Clean the message (remove dynamic parts) clean_msg = re.sub(r"\(.*?\)", "", raw_msg) clean_msg = re.sub(r"'.*?'", "", clean_msg) clean_msg = re.sub(r"\d+", "", clean_msg) rule_name = clean_msg.split(":")[0].strip() if rule_name not in latex_stats: latex_stats[rule_name] = 0 latex_stats[rule_name] += 1 return bib_stats, latex_stats def generate_console_output(self) -> str: """Generate console-friendly output (Summary + Issues only).""" lines = [] # Summary statistics lines.extend(self._generate_summary()) lines.append("") # Critical Issues lines.extend(self._generate_issues_section()) lines.append("") return "\n".join(lines) def _generate_header(self) -> list[str]: """Generate report header. File names are intentionally not printed — keep the report portable, and never expose local source paths to anyone the report is shared with. """ timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') return [ "# Bibliography Validation Report", "", f"**Generated:** {timestamp}", "", "| Inputs | Count |", "|--------|-------|", f"| **Bib File(s)** | {len(self.bib_files)} |", f"| **TeX File(s)** | {len(self.tex_files)} |", ] def _generate_disclaimer(self) -> list[str]: """Generate disclaimer section.""" return [ "> **⚠️ Disclaimer:** This report is generated by an automated tool. While BibGuard strives for accuracy, it may produce false positives or miss certain issues. **This tool cannot replace human review.** Please manually verify all reported issues before making changes to your bibliography." ] def _generate_summary(self) -> list[str]: """Generate summary statistics.""" total = len(self.entries) # Check availability of results has_metadata = any(e.comparison is not None for e in self.entries) has_usage = any(e.usage is not None for e in self.entries) has_eval = any(len(e.evaluations) > 0 for e in self.entries) # Calculate Verified/Issues # Note: _is_verified depends on _has_issues. # If a check wasn't run, it won't contribute to issues. verified = sum(1 for e in self.entries if self._is_verified(e)) issues = sum(1 for e in self.entries if self._has_issues(e)) # Usage stats if has_usage: used = sum(1 for e in self.entries if e.usage and e.usage.is_used) unused = total - used used_str = str(used) unused_str = str(unused) missing_str = str(len(self.missing_citations)) else: used_str = "N/A" unused_str = "N/A" missing_str = "N/A" # Duplicate stats - show N/A if check wasn't run (duplicate_groups is None means not checked) if self.duplicate_groups is None: dup_str = "N/A" else: dup_str = str(len(self.duplicate_groups)) # Preprint detection (only if enabled) preprint_str = "N/A" preprint_warning = [] if self.check_preprint_ratio and has_usage: used_entries = [e for e in self.entries if e.usage and e.usage.is_used] if used_entries: preprint_count = sum(1 for e in used_entries if self._is_preprint(e.entry)) preprint_ratio = preprint_count / len(used_entries) preprint_str = f"{preprint_count} ({preprint_ratio:.1%})" # Warning if exceeds threshold if preprint_ratio > self.preprint_warning_threshold: preprint_warning = [ "", f"> ⚠️ **High Preprint Ratio Warning:** {preprint_ratio:.1%} of your used references are preprints (arXiv, bioRxiv, etc.). Consider replacing some with peer-reviewed publications if available." ] summary_lines = [ "## 📊 Summary", "", "### 📚 Bibliography Statistics", "", "| Metric | Count |", "|--------|-------|", f"| **Total Entries** | {total} |", f"| ✅ **Verified (Clean)** | {verified} |", f"| ⚠️ **With Issues** | {issues} |", f"| 📝 **Used in TeX** | {used_str} |", f"| 🗑️ **Unused** | {unused_str} |", f"| 🔄 **Duplicate Groups** | {dup_str} |", f"| ❌ **Missing Bib Entries** | {missing_str} |", f"| 📄 **Preprints (Used)** | {preprint_str} |", ] # Add warning if needed if preprint_warning: summary_lines.extend(preprint_warning) summary_lines.extend([ "", "### 📋 LaTeX Quality Checks", "", self._get_submission_summary() ]) return summary_lines def _is_preprint(self, entry: BibEntry) -> bool: """Check if an entry is a preprint.""" # Preprint indicators preprint_keywords = [ 'arxiv', 'biorxiv', 'medrxiv', 'ssrn', 'preprint', 'openreview', 'techreport', 'technical report', 'working paper', 'tech report', 'tech. report' ] # Check entry type if entry.entry_type.lower() in ['techreport', 'unpublished', 'misc']: # Further check if it's actually a preprint text_to_check = ' '.join([ entry.journal.lower(), entry.booktitle.lower(), entry.publisher.lower(), entry.entry_type.lower() ]) if any(keyword in text_to_check for keyword in preprint_keywords): return True # Check if arXiv ID exists if entry.has_arxiv: return True # Check journal/booktitle/publisher fields venue_text = ' '.join([ entry.journal.lower(), entry.booktitle.lower(), entry.publisher.lower() ]) return any(keyword in venue_text for keyword in preprint_keywords) def _get_submission_summary(self) -> str: """Generate submission quality summary table.""" if not self.submission_results: return "*No quality checks were performed.*" # Count by severity error_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.ERROR) warning_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.WARNING) info_count = sum(1 for r in self.submission_results if r.severity == CheckSeverity.INFO) lines = [ "| Severity | Count |", "|----------|-------|", f"| 🔴 **Errors** | {error_count} |", f"| 🟡 **Warnings** | {warning_count} |", f"| 🔵 **Suggestions** | {info_count} |" ] return "\n".join(lines) def _is_verified(self, entry: EntryReport) -> bool: """Check if entry is clean (no issues).""" return not self._has_issues(entry) def _has_issues(self, entry: EntryReport) -> bool: """Check if entry has any issues.""" # Metadata issues if entry.comparison and entry.comparison.has_issues: return True # LLM issues (low relevance) if any(ev.relevance_score <= 2 for ev in entry.evaluations): return True # NOTE: We don't include usage issues (unused) here because # unused entries are already shown in the "Unused Entries" section return False def _has_metadata_or_relevance_issues(self, entry: EntryReport) -> bool: """Check if entry has metadata or relevance issues (excluding duplicate/unused).""" # Metadata issues if entry.comparison and entry.comparison.has_issues: return True # LLM issues (low relevance) if any(ev.relevance_score <= 2 for ev in entry.evaluations): return True return False def _generate_issues_section(self) -> list[str]: """Generate detailed section for entries with issues.""" lines = ["## ⚠️ Critical Issues Detected", ""] has_any_issues = False # 1. Missing Citations if self.missing_citations: has_any_issues = True lines.append("### ❌ Missing Bibliography Entries") lines.append("The following keys are cited in the TeX file but missing from the .bib file:") lines.append("") for key in self.missing_citations: lines.append(f"- `{key}`") lines.append("") # 2. Duplicate Entries if self.duplicate_groups: has_any_issues = True lines.append("### 🔄 Duplicate Entries") for i, group in enumerate(self.duplicate_groups, 1): lines.append(f"#### Group {i} (Similarity: {group.similarity_score:.0%})") lines.append(f"**Reason:** {group.reason}") lines.append("") lines.append("| Key | Title | Year |") lines.append("|-----|-------|------|") for entry in group.entries: lines.append(f"| `{entry.key}` | {entry.title} | {entry.year} |") lines.append("") # 3. Unused Entries unused = [e for e in self.entries if e.usage and not e.usage.is_used] if unused: has_any_issues = True lines.append("### 🗑️ Unused Entries") lines.append("The following entries are in the .bib file but NOT cited in the TeX file:") lines.append("") for e in unused: lines.append(f"- `{e.entry.key}`: *{e.entry.title}*") lines.append("") # 4. Metadata Mismatches & Low Relevance issue_entries = [e for e in self.entries if self._has_metadata_or_relevance_issues(e)] if issue_entries: has_any_issues = True lines.append("### ⚠️ Metadata & Relevance Issues") for entry_report in issue_entries: lines.extend(self._format_entry_detail(entry_report, is_verified=False)) if not has_any_issues: lines.append("🎉 **No critical issues found!**") return lines def _generate_verified_section(self) -> list[str]: """Generate section for verified entries.""" lines = ["## ✅ Verified Entries", ""] verified = [e for e in self.entries if self._is_verified(e)] if not verified: lines.append("_No verified entries found._") return lines lines.append(f"Found **{len(verified)}** entries with correct metadata.") lines.append("") # Use a collapsible details block for clean UI lines.append("
") lines.append("Click to view verified entries") lines.append("") for entry_report in verified: lines.extend(self._format_entry_detail(entry_report, minimal=self.minimal_verified, is_verified=True)) lines.append("
") return lines def _format_entry_detail(self, report: EntryReport, minimal: bool = False, is_verified: bool = False) -> list[str]: """Format a single entry report in Markdown.""" entry = report.entry comp = report.comparison lines = [] # Title header - use checkmark for verified entries, warning for issues icon = "✅" if is_verified else "⚠️" lines.append(f"#### {icon} `{entry.key}`") lines.append(f"**Title:** {entry.title}") lines.append("") # Metadata Status if comp: status_icon = "✅" if comp.is_match else "❌" lines.append(f"- **Metadata Status:** {status_icon} {comp.source.upper()} (Confidence: {comp.confidence:.1%})") if comp.has_issues and not minimal: lines.append(" - **Discrepancies:**") for issue in comp.issues: # Format mismatch details nicely if "Mismatch" in issue or "mismatch" in issue: lines.append(f" - 🔴 {issue}") if "Title" in issue: lines.append(f" - **Bib:** `{comp.bib_title}`") lines.append(f" - **Fetched:** `{comp.fetched_title}`") elif "Author" in issue: lines.append(f" - **Bib:** `{', '.join(comp.bib_authors)}`") lines.append(f" - **Fetched:** `{', '.join(comp.fetched_authors)}`") else: lines.append(f" - 🔸 {issue}") # Positive notes (corroboration, year-tolerance) — separate from issues. notes = list(getattr(comp, "notes", []) or []) if notes and not minimal: lines.append(" - **Notes:**") for note in notes: lines.append(f" - 🟢 {note}") # Relevance Status if report.evaluations and not minimal: lines.append("- **Relevance Analysis:**") for eval_res in report.evaluations: score_icon = "🟢" if eval_res.relevance_score >= 4 else ("🟡" if eval_res.relevance_score == 3 else "🔴") lines.append(f" - {score_icon} **Score {eval_res.relevance_score}/5** ({eval_res.score_label})") if eval_res.line_number: lines.append(f" - Line {eval_res.line_number}") lines.append(f" - *\"{eval_res.explanation}\"*") lines.append("") lines.append("---") lines.append("") return lines def _generate_submission_section(self) -> list[str]: """Generate section for submission quality check results.""" lines = ["## 📋 Submission Quality Checks", ""] # Template info if self.template: lines.append(f"**Conference Template:** {self.template.name}") lines.append(f"**Page Limit:** {self.template.page_limit_review} (review) / {self.template.page_limit_camera} (camera-ready)") if self.template.mandatory_sections: lines.append(f"**Required Sections:** {', '.join(self.template.mandatory_sections)}") lines.append("") # Count by severity errors = [r for r in self.submission_results if r.severity == CheckSeverity.ERROR and not r.passed] warnings = [r for r in self.submission_results if r.severity == CheckSeverity.WARNING and not r.passed] infos = [r for r in self.submission_results if r.severity == CheckSeverity.INFO and not r.passed] # Summary if errors or warnings or infos: lines.append("| Severity | Count |") lines.append("|----------|-------|") if errors: lines.append(f"| 🔴 **Errors** | {len(errors)} |") if warnings: lines.append(f"| 🟡 **Warnings** | {len(warnings)} |") if infos: lines.append(f"| 🔵 **Suggestions** | {len(infos)} |") lines.append("") else: lines.append("🎉 **No submission issues found!**") lines.append("") return lines # Group by checker by_checker = {} for result in self.submission_results: if result.passed: continue if result.checker_name not in by_checker: by_checker[result.checker_name] = [] by_checker[result.checker_name].append(result) def _format_one(result) -> list[str]: """Render a single CheckResult — line number only, no file path, no truncation. The HTML report follows the same convention.""" buf = [f"- {result.message}"] if result.line_number: buf.append(f" - Line {result.line_number}") if result.line_content: # Highlight the offending span if the checker provided one. content = result.line_content if getattr(result, "match_text", None) and result.match_text in content: idx = content.index(result.match_text) content = (content[:idx] + "**" + result.match_text + "**" + content[idx + len(result.match_text):]) buf.append(f" - `{content}`") if result.suggestion: buf.append(f" - 💡 *{result.suggestion}*") return buf # Display errors first if errors: lines.append("### 🔴 Critical Errors") lines.append("") for result in errors: lines.extend(_format_one(result)) lines.append("") # Display warnings if warnings: lines.append("### 🟡 Warnings") lines.append("") for result in warnings: lines.extend(_format_one(result)) lines.append("") # Display suggestions (collapsible) if infos: lines.append("### 🔵 Suggestions") lines.append("
") lines.append("Click to view suggestions") lines.append("") for result in infos: lines.extend(_format_one(result)) lines.append("") lines.append("
") lines.append("") return lines def _generate_footer(self) -> list[str]: """Generate report footer.""" return [ "", "---", f"Report generated by **BibGuard** on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" ] def save(self, filepath: str): """Save report to file.""" content = self.generate() with open(filepath, 'w', encoding='utf-8') as f: f.write(content) def save_bibliography_report(self, filepath: str): """Generate and save bibliography-only report (all bib-related checks).""" lines = [] # Header lines.append("# Bibliography Validation Report") lines.append("") lines.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") lines.append("") lines.append("| Inputs | Count |") lines.append("|--------|-------|") lines.append(f"| **Bib File(s)** | {len(self.bib_files)} |") lines.append(f"| **TeX File(s)** | {len(self.tex_files)} |") lines.append("") # Disclaimer lines.extend(self._generate_disclaimer()) lines.append("") # Summary - Bibliography only total = len(self.entries) verified = sum(1 for e in self.entries if self._is_verified(e)) issues = sum(1 for e in self.entries if self._has_issues(e)) has_usage = any(e.usage is not None for e in self.entries) if has_usage: used = sum(1 for e in self.entries if e.usage and e.usage.is_used) unused = total - used used_str = str(used) unused_str = str(unused) missing_str = str(len(self.missing_citations)) else: used_str = "N/A" unused_str = "N/A" missing_str = "N/A" if self.duplicate_groups is None: dup_str = "N/A" else: dup_str = str(len(self.duplicate_groups)) lines.append("## 📊 Summary") lines.append("") lines.append("| Metric | Count |") lines.append("|--------|-------|") lines.append(f"| **Total Entries** | {total} |") lines.append(f"| ✅ **Verified (Clean)** | {verified} |") lines.append(f"| ⚠️ **With Issues** | {issues} |") lines.append(f"| 📝 **Used in TeX** | {used_str} |") lines.append(f"| 🗑️ **Unused** | {unused_str} |") lines.append(f"| 🔄 **Duplicate Groups** | {dup_str} |") lines.append(f"| ❌ **Missing Bib Entries** | {missing_str} |") lines.append("") # Issues section lines.extend(self._generate_issues_section()) lines.append("") # Verified entries lines.extend(self._generate_verified_section()) lines.append("") # Footer lines.extend(self._generate_footer()) content = "\n".join(lines) with open(filepath, 'w', encoding='utf-8') as f: f.write(content) # ------------------------------------------------------------------ # JSON + standalone HTML output # ------------------------------------------------------------------ def build_payload(self) -> Dict[str, Any]: """Build the JSON-serializable payload used by JSON & HTML outputs.""" def _entry_dict(e: BibEntry) -> dict: return { "key": e.key, "entry_type": e.entry_type, "title": e.title, "author": e.author, "year": e.year, "journal": e.journal, "booktitle": e.booktitle, "publisher": e.publisher, "doi": e.doi, "arxiv_id": e.arxiv_id, "url": e.url, "volume": e.volume, "pages": e.pages, } def _comparison_dict(c: Optional[ComparisonResult]) -> Optional[dict]: if c is None: return None return { "is_match": c.is_match, "confidence": c.confidence, "title_match": c.title_match, "title_similarity": c.title_similarity, "author_match": c.author_match, "author_similarity": c.author_similarity, "year_match": c.year_match, "bib_title": c.bib_title, "fetched_title": c.fetched_title, "bib_authors": c.bib_authors, "fetched_authors": c.fetched_authors, "bib_year": c.bib_year, "fetched_year": c.fetched_year, "issues": list(c.issues), "source": c.source, "notes": list(getattr(c, "notes", []) or []), "published_version_hint": getattr(c, "published_version_hint", ""), } def _usage_dict(u: Optional[UsageResult]) -> Optional[dict]: if u is None: return None return {"is_used": u.is_used, "usage_count": getattr(u, "usage_count", 0)} def _eval_dict(ev: EvaluationResult) -> dict: return { "entry_key": ev.entry_key, "relevance_score": ev.relevance_score, "is_relevant": ev.is_relevant, "explanation": ev.explanation, "citation_role": getattr(ev, "citation_role", ""), "line_number": ev.line_number, "file_path": ev.file_path, "error": ev.error, } entries_payload = [] for r in self.entries: entries_payload.append({ "entry": _entry_dict(r.entry), "comparison": _comparison_dict(r.comparison), "usage": _usage_dict(r.usage), "evaluations": [_eval_dict(ev) for ev in (r.evaluations or [])], }) sub_payload = [] for r in self.submission_results: sub_payload.append({ "checker": r.checker_name, "passed": r.passed, "severity": r.severity.value if hasattr(r.severity, "value") else str(r.severity), "message": r.message, "line_number": r.line_number, "line_content": r.line_content, "suggestion": r.suggestion, # file_path intentionally omitted — user-facing report should # never expose local tex paths. "match_text": getattr(r, "match_text", None), }) retr_payload = [] for f in self.retraction_findings: res = getattr(f, "result", None) retr_payload.append({ "entry_key": getattr(f, "entry_key", ""), "doi": getattr(f, "doi", ""), "is_retracted": getattr(res, "is_retracted", False) if res else False, "update_type": getattr(res, "update_type", "") if res else "", "notice_doi": getattr(res, "notice_doi", "") if res else "", "notice_label": getattr(res, "notice_label", "") if res else "", "notice_url": getattr(res, "notice_url", "") if res else "", }) url_payload = [] for f in self.url_findings: url_payload.append({ "entry_key": getattr(f, "entry_key", ""), "url": getattr(f, "url", ""), "status": getattr(f, "status", ""), "status_code": getattr(f, "status_code", None), "detail": getattr(f, "detail", ""), }) duplicates = [] for grp in (self.duplicate_groups or []): keys = [getattr(e, "key", "") for e in getattr(grp, "entries", [])] duplicates.append([k for k in keys if k]) bib_stats, latex_stats = self.get_summary_stats() return { "meta": { "generated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), # Counts only — never expose source filenames in any # downstream artifact (HTML, JSON, anywhere else). "bib_files_count": len(self.bib_files), "tex_files_count": len(self.tex_files), "template": getattr(self.template, "name", "") if self.template else "", }, "summary": {"bibliography": bib_stats, "latex": latex_stats}, "entries": entries_payload, "submission_results": sub_payload, "retractions": retr_payload, "url_findings": url_payload, "duplicates": duplicates, "missing_citations": list(self.missing_citations), } def save_json(self, filepath: str) -> None: """Write a machine-readable JSON dump of the full report.""" payload = self.build_payload() with open(filepath, "w", encoding="utf-8") as f: json.dump(payload, f, ensure_ascii=False, indent=2, default=_json_default) def save_html(self, filepath: str) -> None: """Write a single self-contained HTML report (CSS+JS inlined).""" payload = self.build_payload() html = render_standalone_html(payload) with open(filepath, "w", encoding="utf-8") as f: f.write(html) def save_latex_quality_report(self, filepath: str, submission_results: List[CheckResult], template=None): """Generate and save LaTeX quality report (all tex-related quality checks).""" lines = [] # Header lines.append("# LaTeX Quality Report") lines.append("") lines.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") lines.append("") lines.append(f"**Inputs:** {len(self.tex_files)} TeX file(s)") lines.append("") if template: lines.append(f"**Template:** {template.name}") lines.append("") # Disclaimer lines.append("> **⚠️ Note:** This report contains automated quality checks for your LaTeX document. Please review all suggestions carefully before making changes.") lines.append("") # Summary error_count = sum(1 for r in submission_results if r.severity == CheckSeverity.ERROR) warning_count = sum(1 for r in submission_results if r.severity == CheckSeverity.WARNING) info_count = sum(1 for r in submission_results if r.severity == CheckSeverity.INFO) lines.append("## 📊 Summary") lines.append("") lines.append("| Severity | Count |") lines.append("|----------|-------|") lines.append(f"| 🔴 **Errors** | {error_count} |") lines.append(f"| 🟡 **Warnings** | {warning_count} |") lines.append(f"| 🔵 **Suggestions** | {info_count} |") lines.append("") # Detailed issues self.submission_results = submission_results self.template = template lines.extend(self._generate_submission_section()) lines.append("") # Footer lines.append("---") lines.append("") lines.append(f"Report generated by **BibGuard** on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") content = "\n".join(lines) with open(filepath, 'w', encoding='utf-8') as f: f.write(content)