apoorvrajdev's picture
feat: finalize Phase 1 modular ML architecture
3a2e5f0
"""Structured logging setup.
Why structlog instead of stdlib `logging`?
* Logs are *data*, not strings. structlog emits dicts that grafana/Datadog/
Better Stack can index without regex parsing.
* The same code path produces colourised pretty logs in dev and JSON logs
in prod, controlled by ``APP_ENV``. Grep the same fields in either mode.
* Bound context (request IDs, model versions) propagates automatically.
Usage:
>>> from captioning.utils.logging import configure_logging, get_logger
>>> configure_logging()
>>> log = get_logger(__name__)
>>> log.info("training started", epoch=1, batch_size=64)
"""
from __future__ import annotations
import logging
import os
import sys
from typing import Any
import structlog
_CONFIGURED = False
def _resolve_level(level: str | int | None) -> int:
"""Coerce a log-level argument (or env default) to a numeric level.
Why this helper exists:
``logging.getLevelName`` is *bidirectional* — it returns ``int`` for
known names and ``str`` for unknown ones (e.g. ``"Level FOO"``). That
union return type defeats type narrowing and would be passed straight
through to ``structlog.make_filtering_bound_logger``, which requires
``int``. We resolve once here, fall back to ``INFO`` on unknown
names, and return a guaranteed ``int``.
"""
if level is None:
level = os.environ.get("LOG_LEVEL", "INFO")
if isinstance(level, int):
return level
resolved = logging.getLevelName(level.upper())
return resolved if isinstance(resolved, int) else logging.INFO
def configure_logging(level: str | int | None = None, json_logs: bool | None = None) -> None:
"""Initialise structlog. Idempotent — calling twice has no effect.
Args:
level: Log level name (``"INFO"``) or numeric value. Defaults to env
``LOG_LEVEL`` or ``INFO``.
json_logs: If True, render JSON; if False, render pretty colourised.
Defaults to True when ``APP_ENV=production``, else False.
"""
global _CONFIGURED
if _CONFIGURED:
return
level_int = _resolve_level(level)
if json_logs is None:
json_logs = os.environ.get("APP_ENV", "development").lower() == "production"
logging.basicConfig(
format="%(message)s",
stream=sys.stdout,
level=level_int,
)
timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)
shared_processors: list[Any] = [
structlog.contextvars.merge_contextvars,
structlog.stdlib.add_log_level,
structlog.stdlib.add_logger_name,
timestamper,
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
]
renderer: Any = (
structlog.processors.JSONRenderer()
if json_logs
else structlog.dev.ConsoleRenderer(colors=True)
)
structlog.configure(
processors=[*shared_processors, renderer],
wrapper_class=structlog.make_filtering_bound_logger(level_int),
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
)
_CONFIGURED = True
def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
"""Return a logger bound to ``name`` (typically ``__name__``)."""
if not _CONFIGURED:
configure_logging()
return structlog.get_logger(name)