BibGuard / src /utils /logging_setup.py
thinkwee
v2.0
fcffa22
"""
Logging bootstrap and per-run capture utilities.
Design goals
------------
1. **One env var to rule them all.** ``BIBGUARD_LOG=DEBUG`` (or
``BIBGUARD_DEBUG=1``) turns on full tracebacks across the codebase. Default
is WARNING so stdout stays quiet during normal runs.
2. **Always-on file log.** Even at WARNING console level we still write a
rotating DEBUG log to ``~/.cache/bibguard/logs/bibguard.log`` (override with
``BIBGUARD_LOG_FILE``). That way, when something blows up mid-run you can
``tail`` or grep the file after the fact — no need to rerun with --verbose.
3. **Pinpoint location.** Formatter includes ``filename:lineno`` so any log
line tells you exactly which source line emitted it.
4. **Per-run capture for the UI.** ``capture_run()`` is a context manager that
returns a buffer + path. The Gradio app attaches it at the start of each
check, then ships the resulting log as a downloadable artifact alongside
the HTML report.
"""
from __future__ import annotations
import logging
import logging.handlers
import os
import sys
import tempfile
from contextlib import contextmanager
from io import StringIO
from pathlib import Path
from typing import Iterator, Optional
# Format used for both console and file. ``%(filename)s:%(lineno)d`` is the
# important addition — it makes any traceback-free warning still navigable.
_FMT = "%(asctime)s %(levelname)-7s %(name)s %(filename)s:%(lineno)d — %(message)s"
_DATEFMT = "%H:%M:%S"
def _resolve(level: str | int) -> int:
if isinstance(level, int):
return level
return getattr(logging, str(level).upper(), logging.WARNING)
def _default_log_path() -> Path:
override = os.environ.get("BIBGUARD_LOG_FILE", "").strip()
if override:
return Path(override).expanduser()
return Path.home() / ".cache" / "bibguard" / "logs" / "bibguard.log"
def setup(level: str | int | None = None, *, quiet: bool = False,
log_file: Optional[Path | str] = None) -> Path:
"""
Configure root logger.
Console level is controlled by ``level`` / ``BIBGUARD_LOG`` / ``quiet``.
Regardless of console level, a DEBUG-level rotating file is *always*
written so failures are reproducible after the fact.
Returns the path to the active log file (useful for surfacing in the UI).
"""
# Resolve console level
if quiet:
console_level = logging.ERROR
elif os.environ.get("BIBGUARD_DEBUG", "").strip() in ("1", "true", "yes"):
console_level = logging.DEBUG
elif level is not None:
console_level = _resolve(level)
else:
console_level = _resolve(os.environ.get("BIBGUARD_LOG", "WARNING"))
root = logging.getLogger()
root.setLevel(logging.DEBUG) # let handlers filter; root keeps everything
# ------------------------------------------------------------- console
# If we already attached a console handler, reuse it (avoids duplicates
# when modules import this multiple times).
console_handler = None
for h in root.handlers:
if getattr(h, "_bibguard_console", False):
console_handler = h
break
if console_handler is None:
console_handler = logging.StreamHandler(sys.stderr)
console_handler._bibguard_console = True # type: ignore[attr-defined]
console_handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT))
root.addHandler(console_handler)
console_handler.setLevel(console_level)
# ------------------------------------------------------------- file
log_path = Path(log_file).expanduser() if log_file else _default_log_path()
file_handler: Optional[logging.handlers.RotatingFileHandler] = None
for h in root.handlers:
if getattr(h, "_bibguard_file", False):
file_handler = h # type: ignore[assignment]
break
try:
if file_handler is None:
log_path.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.handlers.RotatingFileHandler(
str(log_path), maxBytes=2_000_000, backupCount=3, encoding="utf-8",
)
file_handler._bibguard_file = True # type: ignore[attr-defined]
file_handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT))
file_handler.setLevel(logging.DEBUG)
root.addHandler(file_handler)
except OSError as e:
# Non-fatal: filesystem unavailable, fall back to stderr-only.
root.warning("File logging disabled (%s); stderr only.", e)
# Quiet down noisy third-party loggers unless we're in DEBUG console mode.
if console_level > logging.DEBUG:
for noisy in ("urllib3", "requests", "requests_cache", "bibtexparser"):
logging.getLogger(noisy).setLevel(logging.WARNING)
else:
for noisy in ("urllib3", "requests", "requests_cache", "bibtexparser"):
logging.getLogger(noisy).setLevel(logging.INFO)
return log_path
@contextmanager
def capture_run(target_path: Optional[Path] = None) -> Iterator[tuple[Path, "_RunStats"]]:
"""
Attach a temporary DEBUG-level file handler for the duration of a single run.
Yields ``(path, stats)`` where:
* ``path`` is the per-run log file written into the report's output dir
(or a temp file if ``target_path`` is None).
* ``stats`` exposes ``warnings`` / ``errors`` counters so the UI can
surface "N warnings logged" without reading the file.
Used by ``app.py`` so each Gradio run produces a self-contained
``bibguard.log`` next to ``report.html`` that the user can download.
"""
path = target_path or Path(tempfile.NamedTemporaryFile(
suffix=".log", prefix="bibguard_run_", delete=False
).name)
path.parent.mkdir(parents=True, exist_ok=True)
handler = logging.FileHandler(str(path), mode="w", encoding="utf-8")
handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT))
handler.setLevel(logging.DEBUG)
stats = _RunStats()
handler.addFilter(stats) # filters can also count
root = logging.getLogger()
root.addHandler(handler)
try:
yield path, stats
finally:
try:
handler.flush()
handler.close()
except Exception:
pass
try:
root.removeHandler(handler)
except ValueError:
pass
class _RunStats(logging.Filter):
"""Logging filter that just counts warning+ records (always returns True)."""
def __init__(self) -> None:
super().__init__()
self.warnings = 0
self.errors = 0
self.exceptions = 0
def filter(self, record: logging.LogRecord) -> bool: # type: ignore[override]
if record.levelno >= logging.ERROR:
self.errors += 1
if record.exc_info:
self.exceptions += 1
elif record.levelno >= logging.WARNING:
self.warnings += 1
return True