Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| BibGuard Gradio web app β minimalist iframe layout. | |
| The right pane embeds the self-contained ``report.html`` produced by | |
| ``src/report/html_report.py`` via ``<iframe srcdoc=...>``. This makes the | |
| generated report the single source of truth (per-section filters, full-text | |
| search, dark mode, inline span highlighting all live inside it) and avoids | |
| re-rendering the same content inside Gradio with stale styles. | |
| """ | |
| from __future__ import annotations | |
| import base64 | |
| import logging | |
| import os | |
| import tempfile | |
| import time | |
| from pathlib import Path | |
| import gradio as gr | |
| from src.parsers import BibParser, TexParser | |
| from src.fetchers import ( | |
| ArxivFetcher, CrossRefFetcher, SemanticScholarFetcher, | |
| OpenAlexFetcher, DBLPFetcher, | |
| ) | |
| from src.analyzers import MetadataComparator, UsageChecker, DuplicateDetector | |
| from src.report.generator import ReportGenerator, EntryReport | |
| from src.config.yaml_config import ( | |
| BibGuardConfig, BibliographyConfig, SubmissionConfig, OutputConfig, | |
| ) | |
| from src.config.workflow import get_default_workflow | |
| from src.checkers import CHECKER_REGISTRY | |
| from src.checkers.retraction_checker import RetractionChecker | |
| from src.checkers.url_checker import URLChecker | |
| from src.utils import http as http_layer | |
| from src.utils.logging_setup import setup as setup_logging, capture_run | |
| from src.utils.validation import validate_bib, validate_tex, format_report | |
| from app_helper import fetch_and_compare_with_workflow | |
| LOG_PATH = setup_logging(os.environ.get("BIBGUARD_LOG", "WARNING")) | |
| logger = logging.getLogger("bibguard.app") | |
| logger.info("BibGuard app starting (log file: %s)", LOG_PATH) | |
| # Configure HTTP layer once at import time. | |
| http_layer.configure( | |
| contact_email=os.environ.get("BIBGUARD_CONTACT_EMAIL", ""), | |
| cache_enabled=True, | |
| cache_ttl_hours=24, | |
| retry_total=5, | |
| retry_backoff_factor=1.5, | |
| ) | |
| # --------------------------------------------------------------------- presets | |
| PRESETS = { | |
| "Quick": { | |
| "check_metadata": False, "check_duplicates": True, "check_usage": True, "check_preprint_ratio": True, | |
| "url_liveness": False, "retraction": False, | |
| "submission": {"caption": True, "reference": True, "formatting": True, "equation": True, | |
| "ai_artifacts": True, "sentence": True, "consistency": True, "acronym": True, | |
| "number": True, "citation_quality": True, "anonymization": True}, | |
| }, | |
| "Standard": { | |
| "check_metadata": False, "check_duplicates": True, "check_usage": True, "check_preprint_ratio": True, | |
| "url_liveness": False, "retraction": True, | |
| "submission": {"caption": True, "reference": True, "formatting": True, "equation": True, | |
| "ai_artifacts": True, "sentence": True, "consistency": True, "acronym": True, | |
| "number": True, "citation_quality": True, "anonymization": True}, | |
| }, | |
| "Strict": { | |
| "check_metadata": True, "check_duplicates": True, "check_usage": True, "check_preprint_ratio": True, | |
| "url_liveness": True, "retraction": True, | |
| "submission": {"caption": True, "reference": True, "formatting": True, "equation": True, | |
| "ai_artifacts": True, "sentence": True, "consistency": True, "acronym": True, | |
| "number": True, "citation_quality": True, "anonymization": True}, | |
| }, | |
| } | |
| # ----------------------------------------------------------------------- CSS | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); | |
| * { font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; } | |
| /* Reserve space for the vertical scrollbar so expanding the Advanced | |
| accordion (or anything else that adds content) doesn't shift the | |
| layout horizontally. `overflow-y: scroll` on html is the universal | |
| fallback for browsers without scrollbar-gutter. | |
| `overflow-x: hidden` on body kills any page-width jitter coming from | |
| inner elements that briefly overflow during streaming updates. */ | |
| html { scrollbar-gutter: stable; overflow-y: scroll; overflow-x: hidden; } | |
| body { overflow-x: hidden; } | |
| .gradio-container { | |
| max-width: 1400px !important; | |
| margin: 0 auto !important; | |
| padding: 0 20px !important; | |
| box-sizing: border-box !important; | |
| width: 100% !important; | |
| overflow-x: hidden !important; | |
| } | |
| /* Header strip */ | |
| .bg-header { | |
| padding: 14px 4px 12px !important; | |
| border-bottom: 1px solid #e5e7eb; | |
| margin-bottom: 14px; | |
| } | |
| /* ================================================================== | |
| Top toolbar β single horizontal row with all primary controls. | |
| Every primary control has the SAME explicit 56px height. The little | |
| filename/info chip beneath sits in a fixed 18px slot. The columns | |
| wrap that into a 78px tall toolbar that's identical across cells. | |
| ================================================================== */ | |
| .bg-toolbar { | |
| margin-bottom: 14px; | |
| gap: 10px !important; | |
| align-items: flex-start !important; | |
| } | |
| .bg-toolbar .gr-form { gap: 0 !important; } | |
| .bg-toolbar .gr-block { border: none !important; box-shadow: none !important; padding: 0 !important; } | |
| /* Common: any direct primary control fills column width */ | |
| .bg-toolbar > * { width: 100% !important; } | |
| /* ---- Upload buttons ---- */ | |
| .bg-upload-btn, | |
| .bg-upload-btn > .wrap, | |
| .bg-upload-btn > div { | |
| height: 56px !important; | |
| min-height: 56px !important; | |
| max-height: 56px !important; | |
| width: 100% !important; | |
| } | |
| .bg-upload-btn button { | |
| height: 56px !important; | |
| min-height: 56px !important; | |
| max-height: 56px !important; | |
| width: 100% !important; | |
| padding: 0 14px !important; | |
| font-size: 13px !important; | |
| font-weight: 500 !important; | |
| border-radius: 8px !important; | |
| border: 1px dashed #cbd5e1 !important; | |
| background: #f8fafc !important; | |
| color: #334155 !important; | |
| transition: border 0.15s, background 0.15s !important; | |
| line-height: 1 !important; | |
| } | |
| .bg-upload-btn button:hover { | |
| border-color: #2563eb !important; | |
| background: #eff6ff !important; | |
| color: #1e3a8a !important; | |
| } | |
| /* ---- Run / Stop button (same column, visibility-swapped) ---- */ | |
| .bg-run-btn, | |
| .bg-run-btn > .wrap, | |
| .bg-run-btn > div { | |
| height: 56px !important; | |
| min-height: 56px !important; | |
| max-height: 56px !important; | |
| width: 100% !important; | |
| } | |
| .bg-run-btn button { | |
| height: 56px !important; | |
| min-height: 56px !important; | |
| max-height: 56px !important; | |
| width: 100% !important; | |
| font-weight: 600 !important; | |
| border-radius: 8px !important; | |
| font-size: 14px !important; | |
| line-height: 1 !important; | |
| padding: 0 16px !important; | |
| } | |
| .bg-stop-btn button { | |
| background: #dc2626 !important; | |
| color: white !important; | |
| border: none !important; | |
| } | |
| .bg-stop-btn button:hover { background: #b91c1c !important; } | |
| /* ---- Preset radio as horizontal pill chips ---- */ | |
| .bg-preset, | |
| .bg-preset > div, | |
| .bg-preset > .wrap { | |
| height: 56px !important; | |
| min-height: 56px !important; | |
| max-height: 56px !important; | |
| padding: 0 !important; | |
| } | |
| .bg-preset > label, | |
| .bg-preset .label-wrap { display: none !important; } | |
| .bg-preset .wrap, | |
| .bg-preset > div > div, | |
| .bg-preset fieldset { | |
| display: flex !important; | |
| flex-direction: row !important; | |
| gap: 4px !important; | |
| flex-wrap: nowrap !important; | |
| width: 100% !important; | |
| height: 56px !important; | |
| align-items: stretch !important; | |
| border: none !important; | |
| padding: 0 !important; | |
| margin: 0 !important; | |
| } | |
| .bg-preset label { | |
| flex: 1 1 0 !important; | |
| margin: 0 !important; | |
| padding: 0 8px !important; | |
| height: 56px !important; | |
| min-height: 56px !important; | |
| max-height: 56px !important; | |
| border-radius: 8px !important; | |
| font-size: 13px !important; | |
| font-weight: 500 !important; | |
| border: 1px solid #e5e7eb !important; | |
| background: #ffffff !important; | |
| cursor: pointer !important; | |
| text-align: center !important; | |
| display: inline-flex !important; | |
| align-items: center !important; | |
| justify-content: center !important; | |
| line-height: 1 !important; | |
| color: #475569 !important; | |
| transition: background 0.15s, border 0.15s !important; | |
| white-space: nowrap !important; | |
| } | |
| .bg-preset label:hover { background: #f8fafc !important; border-color: #cbd5e1 !important; } | |
| .bg-preset input[type="radio"] { display: none !important; } | |
| .bg-preset label.selected, | |
| .bg-preset label:has(input:checked) { | |
| background: #1e3a8a !important; | |
| color: #ffffff !important; | |
| border-color: #1e3a8a !important; | |
| } | |
| /* ---- Caption chip beneath each toolbar control ---- */ | |
| .bg-fname { | |
| font-size: 11.5px; | |
| color: #94a3b8; | |
| padding: 4px 8px 0 8px; | |
| line-height: 1.3; | |
| overflow: hidden; | |
| text-overflow: ellipsis; | |
| white-space: nowrap; | |
| height: 18px; | |
| box-sizing: content-box; | |
| } | |
| .bg-fname.ok { color: #166534; font-weight: 500; } | |
| /* ================================================================== | |
| Advanced settings β gr.Row with each Checkbox as its own card. | |
| Trick: `display: contents` on Gradio's intermediate wrapper makes | |
| it vanish from the layout tree, so the actual checkbox blocks | |
| become direct flex children of .bg-row. Card style is applied to | |
| each block, not the wrapper, so we get N cards per row instead of | |
| one big box. | |
| ================================================================== */ | |
| .bg-row { | |
| display: flex !important; | |
| flex-direction: row !important; | |
| gap: 10px !important; | |
| align-items: stretch !important; | |
| padding: 4px 0 !important; | |
| } | |
| /* Flatten Gradio's intermediate `.form` / `.gr-form` wrapper so its | |
| children become direct flex items of .bg-row. */ | |
| .bg-row > .form, | |
| .bg-row > .gr-form { | |
| display: contents !important; | |
| } | |
| /* Some Gradio versions emit a plain `<div>` wrapper instead of `.form`. | |
| We can't safely `display: contents` every direct div (the spacer is | |
| one), but if the wrapper has only blocks inside, contents flatten it. */ | |
| .bg-row > div:not(.bg-row-spacer):not(.gr-block):not(.block) { | |
| display: contents !important; | |
| } | |
| /* Each individual checkbox block = a card */ | |
| .bg-row .gr-block, | |
| .bg-row .block { | |
| flex: 1 1 0 !important; | |
| min-width: 0 !important; | |
| background: #f8fafc !important; | |
| border: 1px solid #e5e7eb !important; | |
| border-radius: 8px !important; | |
| padding: 8px 12px !important; | |
| box-shadow: none !important; | |
| transition: background 0.15s, border 0.15s !important; | |
| } | |
| .bg-row .gr-block:hover, | |
| .bg-row .block:hover { | |
| background: #eff6ff !important; | |
| border-color: #cbd5e1 !important; | |
| } | |
| .bg-row label, | |
| .bg-row .gr-checkbox label { | |
| font-size: 13px !important; | |
| font-weight: 500 !important; | |
| line-height: 1.3 !important; | |
| color: #334155 !important; | |
| margin: 0 !important; | |
| padding: 0 !important; | |
| } | |
| .bg-row .gr-info, .bg-row [class*="info"] { display: none !important; } | |
| /* Spacer β invisible flex item that just preserves alignment */ | |
| .bg-row .bg-row-spacer { | |
| flex: 1 1 0 !important; | |
| background: transparent !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| padding: 0 !important; | |
| visibility: hidden !important; | |
| } | |
| /* ================================================================== | |
| Status strip β thin one-liner above the report. | |
| The Gradio HTML wrapper itself is pinned to its parent column's width | |
| so no inner content can change the page geometry during streaming. | |
| ================================================================== */ | |
| #bg-status-wrap, | |
| #bg-status-wrap > * { | |
| width: 100% !important; | |
| max-width: 100% !important; | |
| min-width: 0 !important; | |
| box-sizing: border-box !important; | |
| overflow-x: hidden !important; | |
| } | |
| .bg-status { | |
| padding: 10px 14px; | |
| border-radius: 10px; | |
| background: #f8fafc; | |
| border: 1px solid #e2e8f0; | |
| font-size: 12.5px; | |
| line-height: 1.45; | |
| color: #334155; | |
| margin: 8px 0 12px 0; | |
| max-width: 100%; | |
| overflow: hidden; /* never let inline content widen the page */ | |
| box-sizing: border-box; | |
| } | |
| .bg-status-row { | |
| display: flex; | |
| align-items: center; | |
| gap: 14px; | |
| flex-wrap: nowrap; /* one row, ellipsize the middle */ | |
| min-width: 0; | |
| width: 100%; | |
| } | |
| .bg-status .bg-status-stage { | |
| font-weight: 600; | |
| color: #1e3a8a; | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 8px; | |
| flex-shrink: 0; | |
| white-space: nowrap; | |
| } | |
| .bg-status .bg-status-detail { | |
| color: #475569; | |
| flex: 1 1 0; | |
| min-width: 0; | |
| overflow: hidden; | |
| text-overflow: ellipsis; | |
| white-space: nowrap; | |
| } | |
| .bg-status .bg-status-detail code { | |
| background: #eef2ff; | |
| padding: 1px 6px; | |
| border-radius: 4px; | |
| font-size: 11.5px; | |
| color: #1e3a8a; | |
| } | |
| .bg-status .bg-status-meta { | |
| color: #64748b; | |
| font-size: 11.5px; | |
| display: inline-flex; | |
| flex-wrap: nowrap; | |
| gap: 12px; | |
| flex-shrink: 0; | |
| white-space: nowrap; | |
| } | |
| .bg-status.done { background: #f0fdf4; border-color: #bbf7d0; } | |
| .bg-status.done .bg-status-stage { color: #166534; } | |
| .bg-status.error { background: #fef2f2; border-color: #fecaca; } | |
| .bg-status.error .bg-status-stage { color: #b91c1c; } | |
| .bg-status .spin { | |
| display: inline-block; | |
| width: 10px; height: 10px; | |
| border: 2px solid #cbd5e1; | |
| border-top-color: #2563eb; | |
| border-radius: 50%; | |
| animation: bg-spin 0.9s linear infinite; | |
| } | |
| @keyframes bg-spin { to { transform: rotate(360deg); } } | |
| /* ================================================================== | |
| Report area β full-width iframe. | |
| ================================================================== */ | |
| .bg-main { padding: 0 !important; } | |
| .bg-report-iframe { | |
| width: 100%; | |
| height: 80vh; | |
| min-height: 620px; | |
| border: 1px solid #e5e7eb; | |
| border-radius: 12px; | |
| background: white; | |
| box-shadow: 0 1px 2px rgba(0,0,0,0.04); | |
| } | |
| /* Empty / error placeholder (full-width, centered card) */ | |
| .bg-empty { | |
| display: flex; align-items: center; justify-content: center; | |
| flex-direction: column; gap: 14px; | |
| min-height: 60vh; | |
| color: #6b7280; text-align: center; | |
| border: 2px dashed #e5e7eb; border-radius: 12px; | |
| padding: 56px 24px; | |
| background: #fafafa; | |
| } | |
| .bg-empty .bg-empty-icon { font-size: 56px; line-height: 1; } | |
| .bg-empty .bg-empty-title { font-size: 17px; font-weight: 600; color: #374151; } | |
| .bg-empty .bg-empty-hint { font-size: 14px; max-width: 580px; line-height: 1.6; } | |
| .bg-empty .bg-empty-hint code { background: #f3f4f6; padding: 1px 6px; border-radius: 4px; font-size: 13px; } | |
| /* Compact downloads section */ | |
| .bg-downloads { gap: 6px !important; } | |
| .bg-downloads .gr-file { min-height: auto !important; } | |
| .bg-downloads .bg-file-input > label > div { | |
| height: 52px !important; | |
| min-height: 52px !important; | |
| max-height: 52px !important; | |
| } | |
| /* Footer */ | |
| .bg-footer { | |
| text-align: center; | |
| margin-top: 18px; | |
| padding-top: 12px; | |
| border-top: 1px solid #f1f5f9; | |
| font-size: 11.5px; | |
| color: #9ca3af; | |
| } | |
| .bg-footer code { background: #f3f4f6; padding: 1px 5px; border-radius: 3px; font-size: 11px; } | |
| .bg-footer a { color: #6b7280; text-decoration: none; } | |
| .bg-footer a:hover { text-decoration: underline; } | |
| /* Trim accordion chrome a bit */ | |
| .gr-accordion { border-radius: 10px !important; border: 1px solid #e5e7eb !important; } | |
| .gr-accordion > .label-wrap { padding: 8px 12px !important; font-size: 13px !important; } | |
| @media (prefers-color-scheme: dark) { | |
| .bg-empty { background: #161b22; border-color: #2a313c; color: #9ca3af; } | |
| .bg-empty .bg-empty-title { color: #e6edf3; } | |
| .bg-empty .bg-empty-hint code { background: #21262d; } | |
| .bg-report-iframe { background: #0d1117; border-color: #2a313c; box-shadow: none; } | |
| .bg-status { background: #0f172a; border-color: #1e293b; color: #cbd5e1; } | |
| .bg-status .bg-status-stage { color: #93c5fd; } | |
| .bg-status .bg-status-detail { color: #94a3b8; } | |
| .bg-status .bg-status-detail code { background: #1e293b; color: #93c5fd; } | |
| .bg-status .bg-status-meta { color: #64748b; } | |
| .bg-status.done { background: #052e1a; border-color: #14532d; } | |
| .bg-status.done .bg-status-stage { color: #86efac; } | |
| .bg-status.error { background: #2a0e0e; border-color: #7f1d1d; } | |
| .bg-preset label { background: #161b22 !important; border-color: #2a313c !important; color: #cbd5e1 !important; } | |
| .bg-preset label:hover { background: #1e293b !important; } | |
| .bg-preset .selected { background: #2563eb !important; border-color: #2563eb !important; } | |
| .bg-footer { border-color: #1e293b; } | |
| } | |
| """ | |
| EMPTY_PANEL_HTML = """ | |
| <div class="bg-empty"> | |
| <div class="bg-empty-icon">π</div> | |
| <div class="bg-empty-title">Your interactive report appears here</div> | |
| <div class="bg-empty-hint"> | |
| Upload a <code>.bib</code> file and a <code>.tex</code> file in the toolbar above, | |
| pick a preset, then press <strong>Run check</strong>. The report renders as a | |
| self-contained HTML page with per-section filters, full-text search, | |
| inline span highlighting, and dark-mode support. | |
| </div> | |
| </div> | |
| """ | |
| EMPTY_STATUS_HTML = ( | |
| '<div class="bg-status">' | |
| '<div class="bg-status-row">' | |
| '<span class="bg-status-stage">β Idle</span>' | |
| '<span class="bg-status-detail">Upload <code>.bib</code> + <code>.tex</code> ' | |
| 'and press <strong>Run check</strong> to begin.</span>' | |
| '</div></div>' | |
| ) | |
| def _placeholder(message: str, color: str = "#b91c1c") -> str: | |
| """Inline error/info card shown in place of the iframe.""" | |
| return ( | |
| f'<div class="bg-empty" style="color:{color};border-color:{color}33">' | |
| f'<div class="bg-empty-icon">β οΈ</div>' | |
| f'<div class="bg-empty-title">{message}</div>' | |
| f'</div>' | |
| ) | |
| def _html_to_iframe(html: str) -> str: | |
| """ | |
| Embed an HTML document inside ``<iframe srcdoc>``. | |
| We escape only ``&`` and ``"`` β these are the two characters that can | |
| break the attribute value or get re-decoded as entities. ``<`` and ``>`` | |
| must stay raw, otherwise the inner document would be HTML-encoded. | |
| """ | |
| escaped = html.replace("&", "&").replace('"', """) | |
| return ( | |
| f'<iframe class="bg-report-iframe" srcdoc="{escaped}" ' | |
| f'sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" ' | |
| f'loading="lazy"></iframe>' | |
| ) | |
| def _status_html(stage: str, detail: str = "", meta: list[str] | None = None, | |
| state: str = "running") -> str: | |
| """Render the live-status strip shown above the report. | |
| Layout is a single horizontal row: [stage] [detail] [meta chips]. | |
| Wraps cleanly on narrow screens. | |
| """ | |
| if state == "running": | |
| stage_icon = '<span class="spin"></span>' | |
| elif state == "done": | |
| stage_icon = '<span>β</span>' | |
| elif state == "error": | |
| stage_icon = '<span>β </span>' | |
| else: | |
| stage_icon = '<span>β</span>' | |
| detail_html = f'<span class="bg-status-detail">{detail}</span>' if detail else '<span class="bg-status-detail"></span>' | |
| meta_html = "" | |
| if meta: | |
| meta_html = ( | |
| '<span class="bg-status-meta">' | |
| + " ".join(f"<span>{m}</span>" for m in meta) | |
| + "</span>" | |
| ) | |
| return ( | |
| f'<div class="bg-status {state}">' | |
| f'<div class="bg-status-row">' | |
| f'<span class="bg-status-stage">{stage_icon}<span>{stage}</span></span>' | |
| f'{detail_html}{meta_html}' | |
| f'</div></div>' | |
| ) | |
| # --------------------------------------------------------------- config glue | |
| def create_config_from_ui( | |
| check_metadata, check_usage, check_duplicates, check_preprint_ratio, | |
| caption, reference, formatting, equation, ai_artifacts, | |
| sentence, consistency, acronym, number, citation_quality, anonymization, | |
| ) -> BibGuardConfig: | |
| config = BibGuardConfig() | |
| config.bibliography = BibliographyConfig( | |
| check_metadata=check_metadata, | |
| check_usage=check_usage, | |
| check_duplicates=check_duplicates, | |
| check_preprint_ratio=check_preprint_ratio, | |
| check_relevance=False, # LLM disabled in web mode | |
| ) | |
| config.submission = SubmissionConfig( | |
| caption=caption, reference=reference, formatting=formatting, equation=equation, | |
| ai_artifacts=ai_artifacts, sentence=sentence, consistency=consistency, | |
| acronym=acronym, number=number, citation_quality=citation_quality, | |
| anonymization=anonymization, | |
| ) | |
| config.output = OutputConfig(quiet=True, minimal_verified=False) | |
| return config | |
| def apply_preset(name: str): | |
| p = PRESETS.get(name, PRESETS["Standard"]) | |
| sub = p["submission"] | |
| return ( | |
| p["check_metadata"], p["check_usage"], p["check_duplicates"], p["check_preprint_ratio"], | |
| sub["caption"], sub["reference"], sub["formatting"], sub["equation"], | |
| sub["ai_artifacts"], sub["sentence"], sub["consistency"], sub["acronym"], | |
| sub["number"], sub["citation_quality"], sub["anonymization"], | |
| p["url_liveness"], p["retraction"], | |
| ) | |
| _PRESET_CAPTIONS = { | |
| "Quick": "local checks only Β· no network Β· instant", | |
| "Standard": "local checks + retraction lookup (CrossRef)", | |
| "Strict": "+ URL liveness + multi-source metadata (slow)", | |
| } | |
| def _preset_caption_html(name: str) -> str: | |
| text = _PRESET_CAPTIONS.get(name, "") | |
| return f'<div class="bg-fname" style="text-align:center">{text}</div>' | |
| # ------------------------------------------------------------------ run_check | |
| # Streaming generator. Each yield is a 7-tuple: | |
| # (iframe_html, status_html, html_path, md_path, json_path, | |
| # cleaned_bib_path, log_path) | |
| # `capture_run` attaches a per-run DEBUG file handler so any exception or | |
| # warning anywhere in the pipeline is recorded with full traceback at | |
| # `<out_dir>/bibguard.log`, which is then downloadable. The status panel | |
| # surfaces warning+error counts so problems aren't invisible. | |
| def run_check( | |
| bib_file, tex_file, | |
| check_metadata, check_usage, check_duplicates, check_preprint_ratio, | |
| caption, reference, formatting, equation, ai_artifacts, | |
| sentence, consistency, acronym, number, citation_quality, anonymization, | |
| url_liveness=False, retraction=True, | |
| ): | |
| """Run the full check pipeline as a streaming generator with per-run logging. | |
| `bib_file` / `tex_file` are filesystem path strings (carried by gr.State), | |
| not gr.File objects. The status panel is the single source of progress | |
| feedback β no separate gr.Progress bar. | |
| """ | |
| started = time.time() | |
| def _elapsed() -> str: | |
| return f"β± {int(time.time() - started)}s" | |
| # Initial state: keep current report (None means clear). | |
| if not bib_file or not tex_file: | |
| yield ( | |
| _placeholder("Please choose both a .bib and a .tex file in the toolbar."), | |
| _status_html("Waiting for files", | |
| "Pick a .bib and a .tex file from the toolbar to start.", | |
| state="error"), | |
| None, None, None, None, None, | |
| ) | |
| return | |
| # Allocate the artifact dir up-front so the per-run log lives next to | |
| # the report files. | |
| out_dir = Path(tempfile.mkdtemp(prefix="bibguard_")) | |
| log_path_target = out_dir / "bibguard.log" | |
| # Reset per-source circuit breakers so a previous run's flaky source | |
| # doesn't carry over and skip valid lookups in this run. | |
| http_layer.reset_breakers() | |
| with capture_run(target_path=log_path_target) as (log_path, log_stats): | |
| logger.info("=== run_check start: bib=%s tex=%s ===", bib_file, tex_file) | |
| try: | |
| yield from _run_check_impl( | |
| bib_file, tex_file, out_dir, log_path, log_stats, | |
| check_metadata, check_usage, check_duplicates, check_preprint_ratio, | |
| caption, reference, formatting, equation, ai_artifacts, | |
| sentence, consistency, acronym, number, citation_quality, anonymization, | |
| url_liveness, retraction, started, _elapsed, | |
| ) | |
| except Exception as e: | |
| logger.exception("run_check crashed (entry-level guard)") | |
| yield ( | |
| _placeholder(f"Unhandled error: {e}"), | |
| _status_html("Failed", f"{e} β see <code>bibguard.log</code> for the full traceback.", | |
| state="error"), | |
| None, None, None, None, str(log_path), | |
| ) | |
| finally: | |
| logger.info("=== run_check end: warnings=%d errors=%d ===", | |
| log_stats.warnings, log_stats.errors) | |
| def _run_check_impl( | |
| bib_file, tex_file, out_dir, log_path, log_stats, | |
| check_metadata, check_usage, check_duplicates, check_preprint_ratio, | |
| caption, reference, formatting, equation, ai_artifacts, | |
| sentence, consistency, acronym, number, citation_quality, anonymization, | |
| url_liveness, retraction, started, _elapsed, | |
| ): | |
| """Inner pipeline. Wrapped in `capture_run` by `run_check`. | |
| Every yield is a 7-tuple ending with the log path so the user can | |
| download `bibguard.log` even from intermediate updates. | |
| """ | |
| log_path_str = str(log_path) | |
| bib_path = Path(bib_file) | |
| tex_path = Path(tex_file) | |
| logger.info("Inputs: bib=%s tex=%s out_dir=%s", bib_path, tex_path, out_dir) | |
| def _meta_with_logs(extra: list[str]) -> list[str]: | |
| out = list(extra) | |
| if log_stats.warnings or log_stats.errors: | |
| out.append(f"β {log_stats.warnings}w / {log_stats.errors}e logged") | |
| return out | |
| yield ( | |
| gr.update(), | |
| _status_html("Validating files", | |
| f"Reading <code>{bib_path.name}</code> and <code>{tex_path.name}</code>", | |
| meta=_meta_with_logs([_elapsed()])), | |
| None, None, None, None, log_path_str, | |
| ) | |
| # Pre-flight content validation | |
| bib_rep = validate_bib(bib_path) | |
| tex_rep = validate_tex(tex_path) | |
| msg = "\n".join(filter(None, [ | |
| format_report(bib_rep, bib_path.name), | |
| format_report(tex_rep, tex_path.name), | |
| ])) | |
| if not bib_rep.ok or not tex_rep.ok: | |
| logger.error("File validation failed:\n%s", msg) | |
| block = ( | |
| f'<div class="bg-empty" style="color:#b91c1c;border-color:#b91c1c33">' | |
| f'<div class="bg-empty-icon">β οΈ</div>' | |
| f'<div class="bg-empty-title">File validation failed</div>' | |
| f'<pre style="white-space:pre-wrap;font-size:13px;color:#7f1d1d;' | |
| f'background:#fef2f2;padding:12px;border-radius:6px;max-width:540px">{msg}</pre>' | |
| f'</div>' | |
| ) | |
| yield ( | |
| block, | |
| _status_html("File validation failed", msg.replace("\n", "<br>"), | |
| state="error"), | |
| None, None, None, None, log_path_str, | |
| ) | |
| return | |
| elif msg: | |
| logger.info("Validation warnings:\n%s", msg) | |
| config = create_config_from_ui( | |
| check_metadata, check_usage, check_duplicates, check_preprint_ratio, | |
| caption, reference, formatting, equation, ai_artifacts, | |
| sentence, consistency, acronym, number, citation_quality, anonymization, | |
| ) | |
| yield ( | |
| gr.update(), | |
| _status_html("Parsing", "Loading bibliography and LaTeX source", | |
| meta=_meta_with_logs([_elapsed()])), | |
| None, None, None, None, log_path_str, | |
| ) | |
| tex_content = tex_path.read_text(encoding='utf-8', errors='replace') | |
| bib_parser = BibParser() | |
| entries = bib_parser.parse_file(str(bib_path)) | |
| tex_parser = TexParser() | |
| tex_parser.parse_file(str(tex_path)) | |
| logger.info("Parsed %d bib entries from %s", len(entries), bib_path.name) | |
| bib_config = config.bibliography | |
| # Init components | |
| arxiv_fetcher = crossref_fetcher = ss_fetcher = oa_fetcher = dblp_fetcher = None | |
| comparator = usage_checker = duplicate_detector = None | |
| if bib_config.check_metadata: | |
| arxiv_fetcher = ArxivFetcher() | |
| ss_fetcher = SemanticScholarFetcher() | |
| oa_fetcher = OpenAlexFetcher() | |
| dblp_fetcher = DBLPFetcher() | |
| crossref_fetcher = CrossRefFetcher() | |
| comparator = MetadataComparator() | |
| if bib_config.check_usage: | |
| usage_checker = UsageChecker(tex_parser) | |
| if bib_config.check_duplicates: | |
| duplicate_detector = DuplicateDetector() | |
| report_gen = ReportGenerator( | |
| minimal_verified=False, | |
| check_preprint_ratio=bib_config.check_preprint_ratio, | |
| preprint_warning_threshold=bib_config.preprint_warning_threshold, | |
| ) | |
| report_gen.set_metadata([str(bib_path)], [str(tex_path)]) | |
| # Submission quality checks | |
| yield ( | |
| gr.update(), | |
| _status_html("LaTeX quality checks", | |
| f"Running {len(config.submission.get_enabled_checkers())} checkers on the LaTeX source", | |
| meta=_meta_with_logs([f"π {len(entries)} bib entries", _elapsed()])), | |
| None, None, None, None, log_path_str, | |
| ) | |
| submission_results = [] | |
| for name in config.submission.get_enabled_checkers(): | |
| if name in CHECKER_REGISTRY: | |
| try: | |
| checker = CHECKER_REGISTRY[name]() | |
| results = checker.check(tex_content, {}) | |
| for r in results: | |
| r.file_path = str(tex_path) | |
| submission_results.extend(results) | |
| except Exception: | |
| logger.exception("Checker %s crashed", name) | |
| report_gen.set_submission_results(submission_results, None) | |
| if bib_config.check_duplicates and duplicate_detector: | |
| try: | |
| report_gen.set_duplicate_groups(duplicate_detector.find_duplicates(entries)) | |
| except Exception: | |
| logger.exception("Duplicate detection crashed") | |
| if bib_config.check_usage and usage_checker: | |
| try: | |
| report_gen.set_missing_citations(usage_checker.get_missing_entries(entries)) | |
| except Exception: | |
| logger.exception("Missing-citation lookup crashed") | |
| # Per-entry workflow | |
| total = max(1, len(entries)) | |
| workflow_config = get_default_workflow() | |
| verified_count = 0 | |
| flagged_count = 0 | |
| not_found_count = 0 | |
| last_yield = time.time() | |
| def _identifier_chip(entry) -> str: | |
| """Tiny inline hint about which IDs we have for this entry.""" | |
| bits = [] | |
| if entry.doi: bits.append("DOI") | |
| if entry.has_arxiv: bits.append("arXiv") | |
| if entry.title and not bits: bits.append("title") | |
| elif entry.title: bits.append("title") | |
| return " + ".join(bits) if bits else "no identifiers" | |
| def _outcome_label(cmp) -> str: | |
| if cmp is None: | |
| return "" | |
| if cmp.source == "unable": | |
| return "<span style='color:#b45309'>? no metadata</span>" | |
| if cmp.is_match: | |
| return f"<span style='color:#166534'>β verified by {cmp.source}</span>" | |
| return f"<span style='color:#b45309'>β flagged ({cmp.source})</span>" | |
| for i, entry in enumerate(entries): | |
| # ββ Pre-fetch status: announce identifier set BEFORE the network roundtrip | |
| # so the user sees what's being attempted, not just the entry name. | |
| if bib_config.check_metadata and comparator: | |
| now = time.time() | |
| if now - last_yield > 0.4 or i == 0: | |
| ids = _identifier_chip(entry) | |
| detail = f"<code>{entry.key}</code> Β· querying via <strong>{ids}</strong>" | |
| if entry.title: | |
| short = entry.title[:70] + ("β¦" if len(entry.title) > 70 else "") | |
| detail += f" β <span style='color:#64748b'>{short}</span>" | |
| yield ( | |
| gr.update(), | |
| _status_html( | |
| f"Verifying entry {i + 1}/{total}", | |
| detail, | |
| meta=_meta_with_logs([ | |
| f"π {total} total", | |
| f"β {verified_count}", | |
| f"β {flagged_count}", | |
| f"? {not_found_count}", | |
| _elapsed(), | |
| ]), | |
| ), | |
| None, None, None, None, log_path_str, | |
| ) | |
| last_yield = now | |
| usage_result = None | |
| comparison_result = None | |
| try: | |
| if usage_checker: | |
| usage_result = usage_checker.check_usage(entry) | |
| except Exception: | |
| logger.exception("Usage check crashed for entry=%s", entry.key) | |
| try: | |
| if bib_config.check_metadata and comparator: | |
| comparison_result = fetch_and_compare_with_workflow( | |
| entry, workflow_config, arxiv_fetcher, crossref_fetcher, | |
| ss_fetcher, oa_fetcher, dblp_fetcher, comparator, | |
| ) | |
| if comparison_result is None or comparison_result.source == "unable": | |
| not_found_count += 1 | |
| elif comparison_result.is_match: | |
| verified_count += 1 | |
| else: | |
| flagged_count += 1 | |
| except Exception: | |
| logger.exception("Metadata fetch crashed for entry=%s", entry.key) | |
| report_gen.add_entry_report(EntryReport( | |
| entry=entry, comparison=comparison_result, | |
| usage=usage_result, evaluations=[], | |
| )) | |
| # ββ Post-fetch status: show outcome inline so the user can watch | |
| # results stream in (verified / flagged / not found). | |
| now = time.time() | |
| if now - last_yield > 0.4 or i == total - 1: | |
| outcome = _outcome_label(comparison_result) | |
| detail_parts = [f"<code>{entry.key}</code>"] | |
| if outcome: | |
| detail_parts.append(outcome) | |
| if entry.title: | |
| short = entry.title[:70] + ("β¦" if len(entry.title) > 70 else "") | |
| detail_parts.append(f"<span style='color:#64748b'>{short}</span>") | |
| detail = " Β· ".join(detail_parts) | |
| meta = _meta_with_logs([ | |
| f"π {i + 1}/{total}", | |
| f"β {verified_count}", | |
| f"β {flagged_count}", | |
| f"? {not_found_count}", | |
| _elapsed(), | |
| ]) | |
| yield ( | |
| gr.update(), | |
| _status_html(f"Bibliography {i + 1}/{total}", detail, meta=meta), | |
| None, None, None, None, log_path_str, | |
| ) | |
| last_yield = now | |
| if retraction: | |
| try: | |
| doi_count = sum(1 for e in entries if getattr(e, "doi", "")) | |
| yield ( | |
| gr.update(), | |
| _status_html("Retraction lookups", | |
| f"Querying CrossRef for {doi_count} DOI(s)", | |
| meta=_meta_with_logs([_elapsed()])), | |
| None, None, None, None, log_path_str, | |
| ) | |
| report_gen.set_retraction_findings(RetractionChecker().check_entries(entries)) | |
| except Exception: | |
| logger.exception("Retraction lookup crashed") | |
| if url_liveness: | |
| try: | |
| url_count = sum(1 for e in entries if getattr(e, "url", "")) | |
| yield ( | |
| gr.update(), | |
| _status_html("URL liveness", | |
| f"HEAD-checking {url_count} URL(s) in parallel", | |
| meta=_meta_with_logs([_elapsed()])), | |
| None, None, None, None, log_path_str, | |
| ) | |
| report_gen.set_url_findings(URLChecker().check_entries(entries)) | |
| except Exception: | |
| logger.exception("URL liveness crashed") | |
| # Save artifacts | |
| yield ( | |
| gr.update(), | |
| _status_html("Building report", | |
| "Rendering self-contained HTML, JSON, and Markdown", | |
| meta=_meta_with_logs([_elapsed()])), | |
| None, None, None, None, log_path_str, | |
| ) | |
| html_path = out_dir / "report.html" | |
| md_path = out_dir / "bibliography_report.md" | |
| json_path = out_dir / "report.json" | |
| cleaned_bib_path: Path | None = None | |
| try: | |
| report_gen.save_html(str(html_path)) | |
| report_gen.save_bibliography_report(str(md_path)) | |
| report_gen.save_json(str(json_path)) | |
| if usage_checker: | |
| used_keys = {er.entry.key for er in report_gen.entries if er.usage and er.usage.is_used} | |
| if used_keys: | |
| cleaned_bib_path = out_dir / f"{bib_path.stem}_only_used.bib" | |
| bib_parser.filter_file(str(bib_path), str(cleaned_bib_path), used_keys) | |
| except Exception: | |
| logger.exception("Artifact generation failed") | |
| # Embed report.html as iframe srcdoc | |
| if html_path.exists(): | |
| iframe_html = _html_to_iframe(html_path.read_text(encoding='utf-8')) | |
| else: | |
| iframe_html = _placeholder("Report generation failed β see bibguard.log.") | |
| meta = _meta_with_logs([ | |
| f"π {len(entries)} entries", | |
| f"β {verified_count} verified", | |
| f"β {flagged_count} flagged", | |
| _elapsed(), | |
| ]) | |
| state = "done" | |
| summary = "Report ready. Use the right pane to filter, search, and copy fixes." | |
| if log_stats.errors > 0: | |
| state = "error" | |
| summary = (f"Done with {log_stats.errors} error(s) and {log_stats.warnings} warning(s) " | |
| "logged β see <code>bibguard.log</code> for full tracebacks.") | |
| elif log_stats.warnings > 0: | |
| summary = (f"Report ready ({log_stats.warnings} warnings logged β see " | |
| "<code>bibguard.log</code>).") | |
| yield ( | |
| iframe_html, | |
| _status_html("Done", summary, meta=meta, state=state), | |
| str(html_path) if html_path.exists() else None, | |
| str(md_path) if md_path.exists() else None, | |
| str(json_path) if json_path.exists() else None, | |
| str(cleaned_bib_path) if (cleaned_bib_path and cleaned_bib_path.exists()) else None, | |
| log_path_str, | |
| ) | |
| # --------------------------------------------------------------------- layout | |
| def create_app() -> gr.Blocks: | |
| # Inline app icon as a base64 data URL β works regardless of cwd. | |
| icon_html = '<span style="font-size:28px">π‘οΈ</span>' | |
| try: | |
| icon_path = Path(__file__).parent / "assets" / "icon-192.png" | |
| if icon_path.exists(): | |
| with open(icon_path, "rb") as f: | |
| b64 = base64.b64encode(f.read()).decode() | |
| icon_html = ( | |
| f'<img src="data:image/png;base64,{b64}" ' | |
| f'style="width:32px;height:32px;border-radius:6px" alt="BibGuard">' | |
| ) | |
| except Exception as e: | |
| logger.debug("Icon load failed; using emoji fallback: %s", e, exc_info=True) | |
| with gr.Blocks( | |
| title="BibGuard β Bibliography & LaTeX Quality Auditor", | |
| ) as app: | |
| gr.HTML(f""" | |
| <div class="bg-header" style="display:flex;align-items:center;gap:10px"> | |
| {icon_html} | |
| <strong style="font-size:18px">BibGuard</strong> | |
| <span style="color:#6b7280;font-size:13px">β Bibliography & LaTeX quality auditor</span> | |
| <span style="flex:1"></span> | |
| <a href="https://github.com/thinkwee/BibGuard" target="_blank" | |
| style="color:#6b7280;text-decoration:none;font-size:13px">GitHub β</a> | |
| </div> | |
| """) | |
| # βββββββββββββββββββββββββ Top toolbar βββββββββββββββββββββββββ | |
| # All primary controls on a single horizontal row, every primary | |
| # widget pinned to 56px height. gr.UploadButton replaces gr.File | |
| # because the latter's drop-zone doesn't shrink to a toolbar. | |
| with gr.Row(elem_classes=["bg-toolbar"]): | |
| with gr.Column(scale=2, min_width=200): | |
| bib_btn = gr.UploadButton( | |
| "π Choose .bib file", | |
| file_types=[".bib"], file_count="single", | |
| elem_classes=["bg-upload-btn"], | |
| ) | |
| bib_status = gr.HTML('<div class="bg-fname">no file selected</div>') | |
| with gr.Column(scale=2, min_width=200): | |
| tex_btn = gr.UploadButton( | |
| "π Choose .tex file", | |
| file_types=[".tex"], file_count="single", | |
| elem_classes=["bg-upload-btn"], | |
| ) | |
| tex_status = gr.HTML('<div class="bg-fname">no file selected</div>') | |
| with gr.Column(scale=3, min_width=280): | |
| preset = gr.Radio( | |
| choices=list(PRESETS.keys()), | |
| value="Standard", | |
| show_label=False, | |
| elem_classes=["bg-preset"], | |
| ) | |
| preset_caption = gr.HTML( | |
| _preset_caption_html("Standard"), | |
| ) | |
| with gr.Column(scale=1, min_width=140): | |
| run_btn = gr.Button("βΆ Run check", variant="primary", | |
| elem_classes=["bg-run-btn"]) | |
| stop_btn = gr.Button("βΌ Stop", variant="stop", | |
| elem_classes=["bg-run-btn", "bg-stop-btn"], | |
| visible=False) | |
| gr.HTML('<div class="bg-fname" style="text-align:center"> </div>') | |
| # Holds the selected file paths (strings). Updated by the UploadButton | |
| # callbacks below so run_check sees plain paths regardless of how the | |
| # user picked the files. | |
| bib_path_state = gr.State(value=None) | |
| tex_path_state = gr.State(value=None) | |
| # Advanced fine-grained toggles. Default closed β most users just | |
| # pick a preset and go. Each tab is composed of gr.Row blocks of | |
| # exactly 4 cells so columns line up vertically. Short rows are | |
| # padded with invisible spacer HTML. | |
| def _spacer(): | |
| return gr.HTML('<div class="bg-row-spacer"> </div>', | |
| elem_classes=["bg-row-spacer"]) | |
| with gr.Accordion("βοΈ Advanced settings", open=False): | |
| with gr.Tabs(): | |
| with gr.TabItem("Bibliography"): | |
| with gr.Row(elem_classes=["bg-row"]): | |
| check_metadata = gr.Checkbox(label="Metadata verify", value=False) | |
| check_usage = gr.Checkbox(label="Usage", value=True) | |
| check_duplicates = gr.Checkbox(label="Duplicates", value=True) | |
| check_preprint_ratio = gr.Checkbox(label="Preprints", value=True) | |
| with gr.Row(elem_classes=["bg-row"]): | |
| retraction = gr.Checkbox(label="Retractions", value=True) | |
| url_liveness = gr.Checkbox(label="URL liveness", value=False) | |
| _spacer() | |
| _spacer() | |
| with gr.TabItem("LaTeX format"): | |
| with gr.Row(elem_classes=["bg-row"]): | |
| caption = gr.Checkbox(label="Captions", value=True) | |
| reference = gr.Checkbox(label="References", value=True) | |
| formatting = gr.Checkbox(label="Formatting", value=True) | |
| equation = gr.Checkbox(label="Equations", value=True) | |
| with gr.TabItem("Writing"): | |
| with gr.Row(elem_classes=["bg-row"]): | |
| ai_artifacts = gr.Checkbox(label="AI artifacts", value=True) | |
| sentence = gr.Checkbox(label="Sentences", value=True) | |
| consistency = gr.Checkbox(label="Consistency", value=True) | |
| acronym = gr.Checkbox(label="Acronyms", value=True) | |
| with gr.Row(elem_classes=["bg-row"]): | |
| number = gr.Checkbox(label="Numbers", value=True) | |
| citation_quality = gr.Checkbox(label="Citations", value=True) | |
| anonymization = gr.Checkbox(label="Anonymization", value=True) | |
| _spacer() | |
| # βββββββββββββββββββββββββ Status strip βββββββββββββββββββββββββ | |
| status_panel = gr.HTML(value=EMPTY_STATUS_HTML, elem_id="bg-status-wrap") | |
| # βββββββββββββββββββββββββ Report (full width) βββββββββββββββββββ | |
| with gr.Row(elem_classes=["bg-main"]): | |
| report_panel = gr.HTML(value=EMPTY_PANEL_HTML) | |
| # βββββββββββββββββββββββββ Downloads ββββββββββββββββββββββββββββ | |
| with gr.Accordion("π₯ Downloads", open=False): | |
| with gr.Row(elem_classes=["bg-downloads"]): | |
| download_html = gr.File(label="report.html (offline)", | |
| interactive=False, elem_classes=["bg-file-input"]) | |
| download_md = gr.File(label="bibliography_report.md", | |
| interactive=False, elem_classes=["bg-file-input"]) | |
| download_json = gr.File(label="report.json", | |
| interactive=False, elem_classes=["bg-file-input"]) | |
| download_bib = gr.File(label="cleaned .bib", | |
| interactive=False, elem_classes=["bg-file-input"]) | |
| download_log = gr.File(label="bibguard.log", | |
| interactive=False, elem_classes=["bg-file-input"]) | |
| gr.HTML( | |
| '<div class="bg-footer">' | |
| 'Set <code>$BIBGUARD_CONTACT_EMAIL</code> for the polite-pool User-Agent Β· ' | |
| f'persistent log at <code>{LOG_PATH}</code> Β· ' | |
| 'set <code>BIBGUARD_DEBUG=1</code> for verbose console output.' | |
| '</div>' | |
| ) | |
| preset.change( | |
| fn=apply_preset, | |
| inputs=[preset], | |
| outputs=[ | |
| check_metadata, check_usage, check_duplicates, check_preprint_ratio, | |
| caption, reference, formatting, equation, | |
| ai_artifacts, sentence, consistency, acronym, | |
| number, citation_quality, anonymization, | |
| url_liveness, retraction, | |
| ], | |
| ) | |
| preset.change( | |
| fn=_preset_caption_html, | |
| inputs=[preset], | |
| outputs=[preset_caption], | |
| ) | |
| # ---- Upload-button callbacks: store path in state + update chip ---- | |
| def _on_bib_upload(f): | |
| if f is None: | |
| return None, '<div class="bg-fname">no file selected</div>' | |
| path = getattr(f, "name", str(f)) | |
| return path, f'<div class="bg-fname ok">π {Path(path).name}</div>' | |
| def _on_tex_upload(f): | |
| if f is None: | |
| return None, '<div class="bg-fname">no file selected</div>' | |
| path = getattr(f, "name", str(f)) | |
| return path, f'<div class="bg-fname ok">π {Path(path).name}</div>' | |
| bib_btn.upload(_on_bib_upload, inputs=[bib_btn], outputs=[bib_path_state, bib_status]) | |
| tex_btn.upload(_on_tex_upload, inputs=[tex_btn], outputs=[tex_path_state, tex_status]) | |
| # Run pipeline: | |
| # 1. Toggle visibility: hide Run, show Stop. | |
| # 2. Stream run_check yields into report + status + downloads. | |
| # 3. After completion, swap buttons back. | |
| # Stop button cancels the streaming task via Gradio's `cancels=`. | |
| def _show_stop(): | |
| return gr.update(visible=False), gr.update(visible=True) | |
| def _show_run(): | |
| return gr.update(visible=True), gr.update(visible=False) | |
| run_event = run_btn.click( | |
| fn=_show_stop, inputs=None, outputs=[run_btn, stop_btn], | |
| ).then( | |
| fn=run_check, | |
| inputs=[ | |
| bib_path_state, tex_path_state, | |
| check_metadata, check_usage, check_duplicates, check_preprint_ratio, | |
| caption, reference, formatting, equation, ai_artifacts, | |
| sentence, consistency, acronym, number, citation_quality, anonymization, | |
| url_liveness, retraction, | |
| ], | |
| outputs=[report_panel, status_panel, | |
| download_html, download_md, download_json, download_bib, download_log], | |
| ).then( | |
| fn=_show_run, inputs=None, outputs=[run_btn, stop_btn], | |
| ) | |
| stop_btn.click( | |
| fn=lambda: ( | |
| gr.update(visible=True), | |
| gr.update(visible=False), | |
| _status_html("Cancelled", | |
| "Run interrupted by user. Partial results discarded.", | |
| state="error"), | |
| ), | |
| inputs=None, | |
| outputs=[run_btn, stop_btn, status_panel], | |
| cancels=[run_event], | |
| ) | |
| return app | |
| app = create_app() | |
| if __name__ == "__main__": | |
| _favicon = Path(__file__).parent / "assets" / "icon-192.png" | |
| app.launch( | |
| favicon_path=str(_favicon) if _favicon.exists() else None, | |
| show_error=True, | |
| css=CUSTOM_CSS, | |
| theme=gr.themes.Soft(), | |
| ) | |