File size: 6,711 Bytes
cfb0fa4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | """OpenTelemetry metrics bootstrap for Open WebUI.
This module initialises a MeterProvider that sends metrics to an OTLP
collector. The collector is responsible for exposing a Prometheus
`/metrics` endpoint – WebUI does **not** expose it directly.
Metrics collected:
* http.server.requests (counter)
* http.server.duration (histogram, milliseconds)
Attributes used: http.method, http.route, http.status_code
If you wish to add more attributes (e.g. user-agent) you can, but beware of
high-cardinality label sets.
"""
from __future__ import annotations
import time
from typing import Dict, List, Sequence, Any
from base64 import b64encode
from fastapi import FastAPI, Request
from opentelemetry import metrics
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
OTLPMetricExporter,
)
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
OTLPMetricExporter as OTLPHttpMetricExporter,
)
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.view import View
from opentelemetry.sdk.metrics.export import (
PeriodicExportingMetricReader,
)
from opentelemetry.sdk.resources import Resource
from open_webui.env import (
OTEL_SERVICE_NAME,
OTEL_METRICS_EXPORTER_OTLP_ENDPOINT,
OTEL_METRICS_BASIC_AUTH_USERNAME,
OTEL_METRICS_BASIC_AUTH_PASSWORD,
OTEL_METRICS_OTLP_SPAN_EXPORTER,
OTEL_METRICS_EXPORTER_OTLP_INSECURE,
)
from open_webui.models.users import Users
_EXPORT_INTERVAL_MILLIS = 10_000 # 10 seconds
def _build_meter_provider(resource: Resource) -> MeterProvider:
"""Return a configured MeterProvider."""
headers = []
if OTEL_METRICS_BASIC_AUTH_USERNAME and OTEL_METRICS_BASIC_AUTH_PASSWORD:
auth_string = (
f"{OTEL_METRICS_BASIC_AUTH_USERNAME}:{OTEL_METRICS_BASIC_AUTH_PASSWORD}"
)
auth_header = b64encode(auth_string.encode()).decode()
headers = [("authorization", f"Basic {auth_header}")]
# Periodic reader pushes metrics over OTLP/gRPC to collector
if OTEL_METRICS_OTLP_SPAN_EXPORTER == "http":
readers: List[PeriodicExportingMetricReader] = [
PeriodicExportingMetricReader(
OTLPHttpMetricExporter(
endpoint=OTEL_METRICS_EXPORTER_OTLP_ENDPOINT, headers=headers
),
export_interval_millis=_EXPORT_INTERVAL_MILLIS,
)
]
else:
readers: List[PeriodicExportingMetricReader] = [
PeriodicExportingMetricReader(
OTLPMetricExporter(
endpoint=OTEL_METRICS_EXPORTER_OTLP_ENDPOINT,
insecure=OTEL_METRICS_EXPORTER_OTLP_INSECURE,
headers=headers,
),
export_interval_millis=_EXPORT_INTERVAL_MILLIS,
)
]
# Optional view to limit cardinality: drop user-agent etc.
views: List[View] = [
View(
instrument_name="http.server.duration",
attribute_keys=["http.method", "http.route", "http.status_code"],
),
View(
instrument_name="http.server.requests",
attribute_keys=["http.method", "http.route", "http.status_code"],
),
View(
instrument_name="webui.users.total",
),
View(
instrument_name="webui.users.active",
),
View(
instrument_name="webui.users.active.today",
),
]
provider = MeterProvider(
resource=resource,
metric_readers=list(readers),
views=views,
)
return provider
def setup_metrics(app: FastAPI, resource: Resource) -> None:
"""Attach OTel metrics middleware to *app* and initialise provider."""
metrics.set_meter_provider(_build_meter_provider(resource))
meter = metrics.get_meter(__name__)
# Instruments
request_counter = meter.create_counter(
name="http.server.requests",
description="Total HTTP requests",
unit="1",
)
duration_histogram = meter.create_histogram(
name="http.server.duration",
description="HTTP request duration",
unit="ms",
)
def observe_active_users(
options: metrics.CallbackOptions,
) -> Sequence[metrics.Observation]:
return [
metrics.Observation(
value=Users.get_active_user_count(),
)
]
def observe_total_registered_users(
options: metrics.CallbackOptions,
) -> Sequence[metrics.Observation]:
# IMPORTANT: Use get_num_users() for efficient COUNT(*) query.
# Do NOT use len(get_users()["users"]) - it loads ALL user records into memory,
# causing connection pool exhaustion on high-latency databases (e.g., Aurora).
return [
metrics.Observation(
value=Users.get_num_users() or 0,
)
]
meter.create_observable_gauge(
name="webui.users.total",
description="Total number of registered users",
unit="users",
callbacks=[observe_total_registered_users],
)
meter.create_observable_gauge(
name="webui.users.active",
description="Number of currently active users",
unit="users",
callbacks=[observe_active_users],
)
def observe_users_active_today(
options: metrics.CallbackOptions,
) -> Sequence[metrics.Observation]:
return [metrics.Observation(value=Users.get_num_users_active_today())]
meter.create_observable_gauge(
name="webui.users.active.today",
description="Number of users active since midnight today",
unit="users",
callbacks=[observe_users_active_today],
)
# FastAPI middleware
@app.middleware("http")
async def _metrics_middleware(request: Request, call_next):
start_time = time.perf_counter()
status_code = None
try:
response = await call_next(request)
status_code = getattr(response, "status_code", 500)
return response
except Exception:
status_code = 500
raise
finally:
elapsed_ms = (time.perf_counter() - start_time) * 1000.0
# Route template e.g. "/items/{item_id}" instead of real path.
route = request.scope.get("route")
route_path = getattr(route, "path", request.url.path)
attrs: Dict[str, str | int] = {
"http.method": request.method,
"http.route": route_path,
"http.status_code": status_code,
}
request_counter.add(1, attrs)
duration_histogram.record(elapsed_ms, attrs)
|