File size: 3,447 Bytes
3a2e5f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
"""Structured logging setup.

Why structlog instead of stdlib `logging`?
    * Logs are *data*, not strings. structlog emits dicts that grafana/Datadog/
      Better Stack can index without regex parsing.
    * The same code path produces colourised pretty logs in dev and JSON logs
      in prod, controlled by ``APP_ENV``. Grep the same fields in either mode.
    * Bound context (request IDs, model versions) propagates automatically.

Usage:
    >>> from captioning.utils.logging import configure_logging, get_logger
    >>> configure_logging()
    >>> log = get_logger(__name__)
    >>> log.info("training started", epoch=1, batch_size=64)
"""

from __future__ import annotations

import logging
import os
import sys
from typing import Any

import structlog

_CONFIGURED = False


def _resolve_level(level: str | int | None) -> int:
    """Coerce a log-level argument (or env default) to a numeric level.

    Why this helper exists:
        ``logging.getLevelName`` is *bidirectional* — it returns ``int`` for
        known names and ``str`` for unknown ones (e.g. ``"Level FOO"``). That
        union return type defeats type narrowing and would be passed straight
        through to ``structlog.make_filtering_bound_logger``, which requires
        ``int``. We resolve once here, fall back to ``INFO`` on unknown
        names, and return a guaranteed ``int``.
    """
    if level is None:
        level = os.environ.get("LOG_LEVEL", "INFO")
    if isinstance(level, int):
        return level
    resolved = logging.getLevelName(level.upper())
    return resolved if isinstance(resolved, int) else logging.INFO


def configure_logging(level: str | int | None = None, json_logs: bool | None = None) -> None:
    """Initialise structlog. Idempotent — calling twice has no effect.

    Args:
        level: Log level name (``"INFO"``) or numeric value. Defaults to env
            ``LOG_LEVEL`` or ``INFO``.
        json_logs: If True, render JSON; if False, render pretty colourised.
            Defaults to True when ``APP_ENV=production``, else False.
    """
    global _CONFIGURED
    if _CONFIGURED:
        return

    level_int = _resolve_level(level)
    if json_logs is None:
        json_logs = os.environ.get("APP_ENV", "development").lower() == "production"

    logging.basicConfig(
        format="%(message)s",
        stream=sys.stdout,
        level=level_int,
    )

    timestamper = structlog.processors.TimeStamper(fmt="iso", utc=True)
    shared_processors: list[Any] = [
        structlog.contextvars.merge_contextvars,
        structlog.stdlib.add_log_level,
        structlog.stdlib.add_logger_name,
        timestamper,
        structlog.processors.StackInfoRenderer(),
        structlog.processors.format_exc_info,
    ]
    renderer: Any = (
        structlog.processors.JSONRenderer()
        if json_logs
        else structlog.dev.ConsoleRenderer(colors=True)
    )

    structlog.configure(
        processors=[*shared_processors, renderer],
        wrapper_class=structlog.make_filtering_bound_logger(level_int),
        context_class=dict,
        logger_factory=structlog.stdlib.LoggerFactory(),
        cache_logger_on_first_use=True,
    )
    _CONFIGURED = True


def get_logger(name: str | None = None) -> structlog.stdlib.BoundLogger:
    """Return a logger bound to ``name`` (typically ``__name__``)."""
    if not _CONFIGURED:
        configure_logging()
    return structlog.get_logger(name)