File size: 6,711 Bytes
cfb0fa4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""OpenTelemetry metrics bootstrap for Open WebUI.

This module initialises a MeterProvider that sends metrics to an OTLP
collector. The collector is responsible for exposing a Prometheus
`/metrics` endpoint – WebUI does **not** expose it directly.

Metrics collected:

* http.server.requests (counter)
* http.server.duration (histogram, milliseconds)

Attributes used: http.method, http.route, http.status_code

If you wish to add more attributes (e.g. user-agent) you can, but beware of
high-cardinality label sets.
"""

from __future__ import annotations

import time
from typing import Dict, List, Sequence, Any
from base64 import b64encode

from fastapi import FastAPI, Request
from opentelemetry import metrics
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import (
    OTLPMetricExporter,
)

from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
    OTLPMetricExporter as OTLPHttpMetricExporter,
)
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.view import View
from opentelemetry.sdk.metrics.export import (
    PeriodicExportingMetricReader,
)
from opentelemetry.sdk.resources import Resource

from open_webui.env import (
    OTEL_SERVICE_NAME,
    OTEL_METRICS_EXPORTER_OTLP_ENDPOINT,
    OTEL_METRICS_BASIC_AUTH_USERNAME,
    OTEL_METRICS_BASIC_AUTH_PASSWORD,
    OTEL_METRICS_OTLP_SPAN_EXPORTER,
    OTEL_METRICS_EXPORTER_OTLP_INSECURE,
)
from open_webui.models.users import Users

_EXPORT_INTERVAL_MILLIS = 10_000  # 10 seconds


def _build_meter_provider(resource: Resource) -> MeterProvider:
    """Return a configured MeterProvider."""
    headers = []
    if OTEL_METRICS_BASIC_AUTH_USERNAME and OTEL_METRICS_BASIC_AUTH_PASSWORD:
        auth_string = (
            f"{OTEL_METRICS_BASIC_AUTH_USERNAME}:{OTEL_METRICS_BASIC_AUTH_PASSWORD}"
        )
        auth_header = b64encode(auth_string.encode()).decode()
        headers = [("authorization", f"Basic {auth_header}")]

    # Periodic reader pushes metrics over OTLP/gRPC to collector
    if OTEL_METRICS_OTLP_SPAN_EXPORTER == "http":
        readers: List[PeriodicExportingMetricReader] = [
            PeriodicExportingMetricReader(
                OTLPHttpMetricExporter(
                    endpoint=OTEL_METRICS_EXPORTER_OTLP_ENDPOINT, headers=headers
                ),
                export_interval_millis=_EXPORT_INTERVAL_MILLIS,
            )
        ]
    else:
        readers: List[PeriodicExportingMetricReader] = [
            PeriodicExportingMetricReader(
                OTLPMetricExporter(
                    endpoint=OTEL_METRICS_EXPORTER_OTLP_ENDPOINT,
                    insecure=OTEL_METRICS_EXPORTER_OTLP_INSECURE,
                    headers=headers,
                ),
                export_interval_millis=_EXPORT_INTERVAL_MILLIS,
            )
        ]

    # Optional view to limit cardinality: drop user-agent etc.
    views: List[View] = [
        View(
            instrument_name="http.server.duration",
            attribute_keys=["http.method", "http.route", "http.status_code"],
        ),
        View(
            instrument_name="http.server.requests",
            attribute_keys=["http.method", "http.route", "http.status_code"],
        ),
        View(
            instrument_name="webui.users.total",
        ),
        View(
            instrument_name="webui.users.active",
        ),
        View(
            instrument_name="webui.users.active.today",
        ),
    ]

    provider = MeterProvider(
        resource=resource,
        metric_readers=list(readers),
        views=views,
    )
    return provider


def setup_metrics(app: FastAPI, resource: Resource) -> None:
    """Attach OTel metrics middleware to *app* and initialise provider."""

    metrics.set_meter_provider(_build_meter_provider(resource))
    meter = metrics.get_meter(__name__)

    # Instruments
    request_counter = meter.create_counter(
        name="http.server.requests",
        description="Total HTTP requests",
        unit="1",
    )
    duration_histogram = meter.create_histogram(
        name="http.server.duration",
        description="HTTP request duration",
        unit="ms",
    )

    def observe_active_users(
        options: metrics.CallbackOptions,
    ) -> Sequence[metrics.Observation]:
        return [
            metrics.Observation(
                value=Users.get_active_user_count(),
            )
        ]

    def observe_total_registered_users(
        options: metrics.CallbackOptions,
    ) -> Sequence[metrics.Observation]:
        # IMPORTANT: Use get_num_users() for efficient COUNT(*) query.
        # Do NOT use len(get_users()["users"]) - it loads ALL user records into memory,
        # causing connection pool exhaustion on high-latency databases (e.g., Aurora).
        return [
            metrics.Observation(
                value=Users.get_num_users() or 0,
            )
        ]

    meter.create_observable_gauge(
        name="webui.users.total",
        description="Total number of registered users",
        unit="users",
        callbacks=[observe_total_registered_users],
    )

    meter.create_observable_gauge(
        name="webui.users.active",
        description="Number of currently active users",
        unit="users",
        callbacks=[observe_active_users],
    )

    def observe_users_active_today(
        options: metrics.CallbackOptions,
    ) -> Sequence[metrics.Observation]:
        return [metrics.Observation(value=Users.get_num_users_active_today())]

    meter.create_observable_gauge(
        name="webui.users.active.today",
        description="Number of users active since midnight today",
        unit="users",
        callbacks=[observe_users_active_today],
    )

    # FastAPI middleware
    @app.middleware("http")
    async def _metrics_middleware(request: Request, call_next):
        start_time = time.perf_counter()

        status_code = None
        try:
            response = await call_next(request)
            status_code = getattr(response, "status_code", 500)
            return response
        except Exception:
            status_code = 500
            raise
        finally:
            elapsed_ms = (time.perf_counter() - start_time) * 1000.0

            # Route template e.g. "/items/{item_id}" instead of real path.
            route = request.scope.get("route")
            route_path = getattr(route, "path", request.url.path)

            attrs: Dict[str, str | int] = {
                "http.method": request.method,
                "http.route": route_path,
                "http.status_code": status_code,
            }

            request_counter.add(1, attrs)
            duration_histogram.record(elapsed_ms, attrs)