File size: 6,976 Bytes
fcffa22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 | """
Logging bootstrap and per-run capture utilities.
Design goals
------------
1. **One env var to rule them all.** ``BIBGUARD_LOG=DEBUG`` (or
``BIBGUARD_DEBUG=1``) turns on full tracebacks across the codebase. Default
is WARNING so stdout stays quiet during normal runs.
2. **Always-on file log.** Even at WARNING console level we still write a
rotating DEBUG log to ``~/.cache/bibguard/logs/bibguard.log`` (override with
``BIBGUARD_LOG_FILE``). That way, when something blows up mid-run you can
``tail`` or grep the file after the fact — no need to rerun with --verbose.
3. **Pinpoint location.** Formatter includes ``filename:lineno`` so any log
line tells you exactly which source line emitted it.
4. **Per-run capture for the UI.** ``capture_run()`` is a context manager that
returns a buffer + path. The Gradio app attaches it at the start of each
check, then ships the resulting log as a downloadable artifact alongside
the HTML report.
"""
from __future__ import annotations
import logging
import logging.handlers
import os
import sys
import tempfile
from contextlib import contextmanager
from io import StringIO
from pathlib import Path
from typing import Iterator, Optional
# Format used for both console and file. ``%(filename)s:%(lineno)d`` is the
# important addition — it makes any traceback-free warning still navigable.
_FMT = "%(asctime)s %(levelname)-7s %(name)s %(filename)s:%(lineno)d — %(message)s"
_DATEFMT = "%H:%M:%S"
def _resolve(level: str | int) -> int:
if isinstance(level, int):
return level
return getattr(logging, str(level).upper(), logging.WARNING)
def _default_log_path() -> Path:
override = os.environ.get("BIBGUARD_LOG_FILE", "").strip()
if override:
return Path(override).expanduser()
return Path.home() / ".cache" / "bibguard" / "logs" / "bibguard.log"
def setup(level: str | int | None = None, *, quiet: bool = False,
log_file: Optional[Path | str] = None) -> Path:
"""
Configure root logger.
Console level is controlled by ``level`` / ``BIBGUARD_LOG`` / ``quiet``.
Regardless of console level, a DEBUG-level rotating file is *always*
written so failures are reproducible after the fact.
Returns the path to the active log file (useful for surfacing in the UI).
"""
# Resolve console level
if quiet:
console_level = logging.ERROR
elif os.environ.get("BIBGUARD_DEBUG", "").strip() in ("1", "true", "yes"):
console_level = logging.DEBUG
elif level is not None:
console_level = _resolve(level)
else:
console_level = _resolve(os.environ.get("BIBGUARD_LOG", "WARNING"))
root = logging.getLogger()
root.setLevel(logging.DEBUG) # let handlers filter; root keeps everything
# ------------------------------------------------------------- console
# If we already attached a console handler, reuse it (avoids duplicates
# when modules import this multiple times).
console_handler = None
for h in root.handlers:
if getattr(h, "_bibguard_console", False):
console_handler = h
break
if console_handler is None:
console_handler = logging.StreamHandler(sys.stderr)
console_handler._bibguard_console = True # type: ignore[attr-defined]
console_handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT))
root.addHandler(console_handler)
console_handler.setLevel(console_level)
# ------------------------------------------------------------- file
log_path = Path(log_file).expanduser() if log_file else _default_log_path()
file_handler: Optional[logging.handlers.RotatingFileHandler] = None
for h in root.handlers:
if getattr(h, "_bibguard_file", False):
file_handler = h # type: ignore[assignment]
break
try:
if file_handler is None:
log_path.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.handlers.RotatingFileHandler(
str(log_path), maxBytes=2_000_000, backupCount=3, encoding="utf-8",
)
file_handler._bibguard_file = True # type: ignore[attr-defined]
file_handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT))
file_handler.setLevel(logging.DEBUG)
root.addHandler(file_handler)
except OSError as e:
# Non-fatal: filesystem unavailable, fall back to stderr-only.
root.warning("File logging disabled (%s); stderr only.", e)
# Quiet down noisy third-party loggers unless we're in DEBUG console mode.
if console_level > logging.DEBUG:
for noisy in ("urllib3", "requests", "requests_cache", "bibtexparser"):
logging.getLogger(noisy).setLevel(logging.WARNING)
else:
for noisy in ("urllib3", "requests", "requests_cache", "bibtexparser"):
logging.getLogger(noisy).setLevel(logging.INFO)
return log_path
@contextmanager
def capture_run(target_path: Optional[Path] = None) -> Iterator[tuple[Path, "_RunStats"]]:
"""
Attach a temporary DEBUG-level file handler for the duration of a single run.
Yields ``(path, stats)`` where:
* ``path`` is the per-run log file written into the report's output dir
(or a temp file if ``target_path`` is None).
* ``stats`` exposes ``warnings`` / ``errors`` counters so the UI can
surface "N warnings logged" without reading the file.
Used by ``app.py`` so each Gradio run produces a self-contained
``bibguard.log`` next to ``report.html`` that the user can download.
"""
path = target_path or Path(tempfile.NamedTemporaryFile(
suffix=".log", prefix="bibguard_run_", delete=False
).name)
path.parent.mkdir(parents=True, exist_ok=True)
handler = logging.FileHandler(str(path), mode="w", encoding="utf-8")
handler.setFormatter(logging.Formatter(fmt=_FMT, datefmt=_DATEFMT))
handler.setLevel(logging.DEBUG)
stats = _RunStats()
handler.addFilter(stats) # filters can also count
root = logging.getLogger()
root.addHandler(handler)
try:
yield path, stats
finally:
try:
handler.flush()
handler.close()
except Exception:
pass
try:
root.removeHandler(handler)
except ValueError:
pass
class _RunStats(logging.Filter):
"""Logging filter that just counts warning+ records (always returns True)."""
def __init__(self) -> None:
super().__init__()
self.warnings = 0
self.errors = 0
self.exceptions = 0
def filter(self, record: logging.LogRecord) -> bool: # type: ignore[override]
if record.levelno >= logging.ERROR:
self.errors += 1
if record.exc_info:
self.exceptions += 1
elif record.levelno >= logging.WARNING:
self.warnings += 1
return True
|