File size: 5,042 Bytes
d2d1903 6b9fff4 d2d1903 6b9fff4 d2d1903 6b9fff4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | """
infra.api.request_context β request-scoped correlation IDs (Stage 84).
Production V1 polish. Before this stage, a customer reporting
"500 error around 14:23" left the operator grep'ing logs by
timestamp and hoping no one else was sharing that window. With
correlation IDs every log line emitted during a request carries
the same ``request_id``; the operator pastes it from the
X-Request-ID response header and gets everything that happened
on that request β middleware, auth, handler, DB queries, the
exception trace.
Design:
* ``request_id`` and ``tenant_id`` live in ``contextvars.ContextVar``
so they're naturally scoped per request (asyncio task) without
threading any explicit context object through every call.
* The X-Request-ID middleware accepts an upstream header value
if it matches a strict allowlist regex (length + charset), so
a hostile peer can't inject 10KB into our log lines. Otherwise
we generate a 16-char UUID hex.
* The auth dependency calls ``set_tenant_context(tid)`` after a
successful lookup, so handler logs include the tenant_id too.
* ``JsonLogFormatter`` (in ``infra.deployment.observability``)
reads these contextvars at format time and adds them to the
payload β no code change needed at the call site of every
``logger.info(...)``.
Stdlib only.
"""
from __future__ import annotations
import re
import uuid
from contextvars import ContextVar
from typing import Mapping, Optional
# Per-request context. Default None when no request is in flight
# (background tasks, scheduler ticks) β the formatter omits the
# field rather than emitting "null".
request_id_var: ContextVar[Optional[str]] = ContextVar(
"orgstate_request_id", default=None,
)
tenant_id_var: ContextVar[Optional[str]] = ContextVar(
"orgstate_tenant_id", default=None,
)
# Stage 154 β request cookies (read-only snapshot) stamped by
# the request-context middleware so auth helpers that don't take
# a FastAPI Request param (the legacy require_tenant_or_admin
# pattern) can still see the SSO session cookie. Default is an
# empty mapping so off-request callers (CLI, scheduler) just get
# a no-op when they look up a cookie name.
request_cookies_var: ContextVar[Mapping[str, str]] = ContextVar(
"orgstate_request_cookies", default={},
)
# Allow upstream-supplied X-Request-ID when it looks sane:
# alphanumerics + dash + underscore + dot, up to 128 chars. The
# format covers UUIDs (with or without dashes), Cloudflare-style
# CF-RAY values, and arbitrary trace IDs. Anything else is
# discarded; we generate fresh.
_VALID_REQUEST_ID = re.compile(r"^[\w.\-]{1,128}$")
def generate_request_id() -> str:
"""16 hex chars β short enough to copy/paste in support
tickets, big enough to be unique within a process lifetime.
Full UUID would be 32 chars; we trim for ergonomics. Birthday
collision after ~2^32 requests; we expect to roll well before
that and a collision wouldn't corrupt anything, just confuse
log grep."""
return uuid.uuid4().hex[:16]
def sanitize_upstream_id(raw: Optional[str]) -> Optional[str]:
"""Return the upstream X-Request-ID iff it matches the
allowlist. Otherwise None (caller generates fresh)."""
if raw is None:
return None
raw = raw.strip()
if not raw:
return None
if not _VALID_REQUEST_ID.match(raw):
return None
return raw
def set_request_context(request_id: str,
tenant_id: Optional[str] = None) -> tuple:
"""Set both contextvars. Returns the two tokens so caller can
reset on response (FastAPI middleware does this via the
contextvar's task-local nature anyway, but tests + non-async
callers need the tokens)."""
tok_req = request_id_var.set(request_id)
tok_tid = tenant_id_var.set(tenant_id)
return tok_req, tok_tid
def set_tenant_context(tenant_id: Optional[str]) -> None:
"""Called from the auth dependency once the tenant is known.
The request_id is already in context from the middleware."""
tenant_id_var.set(tenant_id)
def clear_request_context(tokens: Optional[tuple] = None) -> None:
"""Reset both vars. Pass the tokens returned by
``set_request_context`` for a true reset, or call with no
args to set both to None (good enough for sync teardown)."""
if tokens is None:
request_id_var.set(None)
tenant_id_var.set(None)
return
tok_req, tok_tid = tokens
request_id_var.reset(tok_req)
tenant_id_var.reset(tok_tid)
def current_request_id() -> Optional[str]:
return request_id_var.get()
def current_tenant_id() -> Optional[str]:
return tenant_id_var.get()
def set_request_cookies(cookies: Mapping[str, str]) -> None:
"""Stamp the incoming request's cookies on the contextvar so
legacy auth helpers (require_key / require_tenant_or_admin)
can read the SSO session cookie without a Request param."""
request_cookies_var.set(cookies)
def current_request_cookies() -> Mapping[str, str]:
return request_cookies_var.get()
|