""" Logging bootstrap and per-run capture utilities. Design goals ------------ 1. **One env var to rule them all.** ``BIBGUARD_LOG=DEBUG`` (or ``BIBGUARD_DEBUG=1``) turns on full tracebacks across the codebase. Default is WARNING so stdout stays quiet during normal runs. 2. **Always-on file log.** Even at WARNING console level we still write a rotating DEBUG log to ``~/.cache/bibguard/logs/bibguard.log`` (override with ``BIBGUARD_LOG_FILE``). That way, when something blows up mid-run you can ``tail`` or grep the file after the fact — no need to rerun with --verbose. 3. **Pinpoint location.** Formatter includes ``filename:lineno`` so any log line tells you exactly which source line emitted it. 4. **Per-run capture for the UI.** ``capture_run()`` is a context manager that returns a buffer + path. The Gradio app attaches it at the start of each check, then ships the resulting log as a downloadable artifact alongside the HTML report. """ from __future__ import annotations import logging import logging.handlers import os import sys import tempfile from contextlib import contextmanager from io import StringIO from pathlib import Path from typing import Iterator, Optional # Format used for both console and file. ``%(filename)s:%(lineno)d`` is the # important addition — it makes any traceback-free warning still navigable. _FMT = "%(asctime)s %(levelname)-7s %(name)s %(filename)s:%(lineno)d — %(message)s" _DATEFMT = "%H:%M:%S" def _resolve(level: str | int) -> int: if isinstance(level, int): return level return getattr(logging, str(level).upper(), logging.WARNING) def _default_log_path() -> Path: override = os.environ.get("BIBGUARD_LOG_FILE", "").strip() if override: return Path(override).expanduser() return Path.home() / ".cache" / "bibguard" / "logs" / "bibguard.log" def setup(level: str | int | None = None, *, quiet: bool = False, log_file: Optional[Path | str] = None) -> Path: """ Configure root logger. Console level is controlled by ``level`` / ``BIBGUARD_LOG`` / ``quiet``. Regardless of console level, a DEBUG-level rotating file is *always* written so failures are reproducible after the fact. Returns the path to the active log file (useful for surfacing in the UI). """ # Resolve console level if quiet: console_level = logging.ERROR elif os.environ.get("BIBGUARD_DEBUG", "").strip() in ("1", "true", "yes"): console_level = logging.DEBUG elif level is not None: console_level = _resolve(level) else: console_level = _resolve(os.environ.get("BIBGUARD_LOG", "WARNING")) root = logging.getLogger() root.setLevel(logging.DEBUG) # let handlers filter; root keeps everything # ------------------------------------------------------------- console # If we already attached a console handler, reuse it (avoids duplicates # when modules import this multiple times). console_handler = None for h in root.handlers: if getattr(h, "_bibguard_console", False): console_handler = h break if console_handler is None: console_handler = logging.StreamHandler(sys.stderr) console_handler._bibguard_console = True # type: ignore[attr-defined] console_handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT)) root.addHandler(console_handler) console_handler.setLevel(console_level) # ------------------------------------------------------------- file log_path = Path(log_file).expanduser() if log_file else _default_log_path() file_handler: Optional[logging.handlers.RotatingFileHandler] = None for h in root.handlers: if getattr(h, "_bibguard_file", False): file_handler = h # type: ignore[assignment] break try: if file_handler is None: log_path.parent.mkdir(parents=True, exist_ok=True) file_handler = logging.handlers.RotatingFileHandler( str(log_path), maxBytes=2_000_000, backupCount=3, encoding="utf-8", ) file_handler._bibguard_file = True # type: ignore[attr-defined] file_handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT)) file_handler.setLevel(logging.DEBUG) root.addHandler(file_handler) except OSError as e: # Non-fatal: filesystem unavailable, fall back to stderr-only. root.warning("File logging disabled (%s); stderr only.", e) # Quiet down noisy third-party loggers unless we're in DEBUG console mode. if console_level > logging.DEBUG: for noisy in ("urllib3", "requests", "requests_cache", "bibtexparser"): logging.getLogger(noisy).setLevel(logging.WARNING) else: for noisy in ("urllib3", "requests", "requests_cache", "bibtexparser"): logging.getLogger(noisy).setLevel(logging.INFO) return log_path @contextmanager def capture_run(target_path: Optional[Path] = None) -> Iterator[tuple[Path, "_RunStats"]]: """ Attach a temporary DEBUG-level file handler for the duration of a single run. Yields ``(path, stats)`` where: * ``path`` is the per-run log file written into the report's output dir (or a temp file if ``target_path`` is None). * ``stats`` exposes ``warnings`` / ``errors`` counters so the UI can surface "N warnings logged" without reading the file. Used by ``app.py`` so each Gradio run produces a self-contained ``bibguard.log`` next to ``report.html`` that the user can download. """ path = target_path or Path(tempfile.NamedTemporaryFile( suffix=".log", prefix="bibguard_run_", delete=False ).name) path.parent.mkdir(parents=True, exist_ok=True) handler = logging.FileHandler(str(path), mode="w", encoding="utf-8") handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT)) handler.setLevel(logging.DEBUG) stats = _RunStats() handler.addFilter(stats) # filters can also count root = logging.getLogger() root.addHandler(handler) try: yield path, stats finally: try: handler.flush() handler.close() except Exception: pass try: root.removeHandler(handler) except ValueError: pass class _RunStats(logging.Filter): """Logging filter that just counts warning+ records (always returns True).""" def __init__(self) -> None: super().__init__() self.warnings = 0 self.errors = 0 self.exceptions = 0 def filter(self, record: logging.LogRecord) -> bool: # type: ignore[override] if record.levelno >= logging.ERROR: self.errors += 1 if record.exc_info: self.exceptions += 1 elif record.levelno >= logging.WARNING: self.warnings += 1 return True