victor HF Staff commited on
Commit
be2778b
·
unverified ·
1 Parent(s): 4f98b48

Add Prometheus metrics support with prom-client (#1934)

Browse files
.env CHANGED
@@ -137,6 +137,8 @@ WEBHOOK_URL_REPORT_ASSISTANT=#provide slack webhook url to get notified for repo
137
 
138
 
139
  ### Metrics ###
 
 
140
  LOG_LEVEL=info
141
 
142
 
 
137
 
138
 
139
  ### Metrics ###
140
+ METRICS_ENABLED=false
141
+ METRICS_PORT=5565
142
  LOG_LEVEL=info
143
 
144
 
chart/env/prod.yaml CHANGED
@@ -58,7 +58,7 @@ envVars:
58
  COOKIE_SAMESITE: "lax"
59
  COOKIE_SECURE: "true"
60
  EXPOSE_API: "true"
61
- METRICS_PORT: "5565"
62
  LOG_LEVEL: "debug"
63
 
64
  OPENAI_BASE_URL: "https://router.huggingface.co/v1"
 
58
  COOKIE_SAMESITE: "lax"
59
  COOKIE_SECURE: "true"
60
  EXPOSE_API: "true"
61
+ METRICS_ENABLED: "true"
62
  LOG_LEVEL: "debug"
63
 
64
  OPENAI_BASE_URL: "https://router.huggingface.co/v1"
chart/templates/deployment.yaml CHANGED
@@ -53,6 +53,11 @@ spec:
53
  - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
54
  name: http
55
  protocol: TCP
 
 
 
 
 
56
  resources: {{ toYaml .Values.resources | nindent 12 }}
57
  {{- with $.Values.extraEnv }}
58
  env:
 
53
  - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
54
  name: http
55
  protocol: TCP
56
+ {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
57
+ - containerPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
58
+ name: metrics
59
+ protocol: TCP
60
+ {{- end }}
61
  resources: {{ toYaml .Values.resources | nindent 12 }}
62
  {{- with $.Values.extraEnv }}
63
  env:
chart/templates/service-monitor.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
2
+ apiVersion: monitoring.coreos.com/v1
3
+ kind: ServiceMonitor
4
+ metadata:
5
+ labels: {{ include "labels.standard" . | nindent 4 }}
6
+ name: {{ include "name" . }}
7
+ namespace: {{ .Release.Namespace }}
8
+ spec:
9
+ selector:
10
+ matchLabels: {{ include "labels.standard" . | nindent 6 }}
11
+ endpoints:
12
+ - port: metrics
13
+ path: /metrics
14
+ interval: 15s
15
+ {{- end }}
chart/templates/service.yaml CHANGED
@@ -11,5 +11,11 @@ spec:
11
  port: 80
12
  protocol: TCP
13
  targetPort: http
 
 
 
 
 
 
14
  selector: {{ include "labels.standard" . | nindent 4 }}
15
  type: {{.Values.service.type}}
 
11
  port: 80
12
  protocol: TCP
13
  targetPort: http
14
+ {{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
15
+ - name: metrics
16
+ port: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
17
+ protocol: TCP
18
+ targetPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | quote }}
19
+ {{- end }}
20
  selector: {{ include "labels.standard" . | nindent 4 }}
21
  type: {{.Values.service.type}}
package-lock.json CHANGED
@@ -35,6 +35,7 @@
35
  "pino": "^9.0.0",
36
  "pino-pretty": "^11.0.0",
37
  "postcss": "^8.4.31",
 
38
  "satori": "^0.10.11",
39
  "satori-html": "^0.3.2",
40
  "sharp": "^0.33.4",
@@ -1748,6 +1749,15 @@
1748
  "node": ">= 8"
1749
  }
1750
  },
 
 
 
 
 
 
 
 
 
1751
  "node_modules/@pkgjs/parseargs": {
1752
  "version": "0.11.0",
1753
  "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
@@ -3515,6 +3525,12 @@
3515
  "license": "MIT",
3516
  "optional": true
3517
  },
 
 
 
 
 
 
3518
  "node_modules/brace-expansion": {
3519
  "version": "2.0.2",
3520
  "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
@@ -8066,6 +8082,19 @@
8066
  ],
8067
  "license": "MIT"
8068
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
8069
  "node_modules/psl": {
8070
  "version": "1.15.0",
8071
  "resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz",
@@ -9303,6 +9332,15 @@
9303
  "streamx": "^2.15.0"
9304
  }
9305
  },
 
 
 
 
 
 
 
 
 
9306
  "node_modules/text-decoder": {
9307
  "version": "1.2.3",
9308
  "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
 
35
  "pino": "^9.0.0",
36
  "pino-pretty": "^11.0.0",
37
  "postcss": "^8.4.31",
38
+ "prom-client": "^15.1.3",
39
  "satori": "^0.10.11",
40
  "satori-html": "^0.3.2",
41
  "sharp": "^0.33.4",
 
1749
  "node": ">= 8"
1750
  }
1751
  },
1752
+ "node_modules/@opentelemetry/api": {
1753
+ "version": "1.9.0",
1754
+ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
1755
+ "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
1756
+ "license": "Apache-2.0",
1757
+ "engines": {
1758
+ "node": ">=8.0.0"
1759
+ }
1760
+ },
1761
  "node_modules/@pkgjs/parseargs": {
1762
  "version": "0.11.0",
1763
  "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
 
3525
  "license": "MIT",
3526
  "optional": true
3527
  },
3528
+ "node_modules/bintrees": {
3529
+ "version": "1.0.2",
3530
+ "resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz",
3531
+ "integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==",
3532
+ "license": "MIT"
3533
+ },
3534
  "node_modules/brace-expansion": {
3535
  "version": "2.0.2",
3536
  "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
 
8082
  ],
8083
  "license": "MIT"
8084
  },
8085
+ "node_modules/prom-client": {
8086
+ "version": "15.1.3",
8087
+ "resolved": "https://registry.npmjs.org/prom-client/-/prom-client-15.1.3.tgz",
8088
+ "integrity": "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==",
8089
+ "license": "Apache-2.0",
8090
+ "dependencies": {
8091
+ "@opentelemetry/api": "^1.4.0",
8092
+ "tdigest": "^0.1.1"
8093
+ },
8094
+ "engines": {
8095
+ "node": "^16 || ^18 || >=20"
8096
+ }
8097
+ },
8098
  "node_modules/psl": {
8099
  "version": "1.15.0",
8100
  "resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz",
 
9332
  "streamx": "^2.15.0"
9333
  }
9334
  },
9335
+ "node_modules/tdigest": {
9336
+ "version": "0.1.2",
9337
+ "resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.2.tgz",
9338
+ "integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==",
9339
+ "license": "MIT",
9340
+ "dependencies": {
9341
+ "bintrees": "1.0.2"
9342
+ }
9343
+ },
9344
  "node_modules/text-decoder": {
9345
  "version": "1.2.3",
9346
  "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
package.json CHANGED
@@ -93,6 +93,7 @@
93
  "pino": "^9.0.0",
94
  "pino-pretty": "^11.0.0",
95
  "postcss": "^8.4.31",
 
96
  "satori": "^0.10.11",
97
  "satori-html": "^0.3.2",
98
  "sharp": "^0.33.4",
 
93
  "pino": "^9.0.0",
94
  "pino-pretty": "^11.0.0",
95
  "postcss": "^8.4.31",
96
+ "prom-client": "^15.1.3",
97
  "satori": "^0.10.11",
98
  "satori-html": "^0.3.2",
99
  "sharp": "^0.33.4",
src/hooks.server.ts CHANGED
@@ -18,6 +18,7 @@ import { initExitHandler } from "$lib/server/exitHandler";
18
  import { refreshConversationStats } from "$lib/jobs/refresh-conversation-stats";
19
  import { adminTokenManager } from "$lib/server/adminToken";
20
  import { isHostLocalhost } from "$lib/server/isURLLocal";
 
21
 
22
  export const init: ServerInit = async () => {
23
  // Wait for config to be fully loaded
@@ -41,6 +42,10 @@ export const init: ServerInit = async () => {
41
  logger.info("Starting server...");
42
  initExitHandler();
43
 
 
 
 
 
44
  checkAndRunMigrations();
45
  refreshConversationStats();
46
 
 
18
  import { refreshConversationStats } from "$lib/jobs/refresh-conversation-stats";
19
  import { adminTokenManager } from "$lib/server/adminToken";
20
  import { isHostLocalhost } from "$lib/server/isURLLocal";
21
+ import { MetricsServer } from "$lib/server/metrics";
22
 
23
  export const init: ServerInit = async () => {
24
  // Wait for config to be fully loaded
 
42
  logger.info("Starting server...");
43
  initExitHandler();
44
 
45
+ if (config.METRICS_ENABLED === "true") {
46
+ MetricsServer.getInstance();
47
+ }
48
+
49
  checkAndRunMigrations();
50
  refreshConversationStats();
51
 
src/lib/server/config.ts CHANGED
@@ -151,7 +151,12 @@ export const ready = (async () => {
151
  }
152
  })();
153
 
154
- type ExtraConfigKeys = "HF_TOKEN" | "ENABLE_ASSISTANTS";
 
 
 
 
 
155
 
156
  type ConfigProxy = ConfigManager & { [K in ConfigKey | ExtraConfigKeys]: string };
157
 
 
151
  }
152
  })();
153
 
154
+ type ExtraConfigKeys =
155
+ | "HF_TOKEN"
156
+ | "OLD_MODELS"
157
+ | "ENABLE_ASSISTANTS"
158
+ | "METRICS_ENABLED"
159
+ | "METRICS_PORT";
160
 
161
  type ConfigProxy = ConfigManager & { [K in ConfigKey | ExtraConfigKeys]: string };
162
 
src/lib/server/metrics.ts ADDED
@@ -0,0 +1,255 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { collectDefaultMetrics, Counter, Registry, Summary } from "prom-client";
2
+ import { logger } from "$lib/server/logger";
3
+ import { config } from "$lib/server/config";
4
+ import { createServer, type Server as HttpServer } from "http";
5
+ import { onExit } from "./exitHandler";
6
+
7
+ type ModelLabel = "model";
8
+ type ToolLabel = "tool";
9
+
10
+ interface Metrics {
11
+ model: {
12
+ conversationsTotal: Counter<ModelLabel>;
13
+ messagesTotal: Counter<ModelLabel>;
14
+ tokenCountTotal: Counter<ModelLabel>;
15
+ timePerOutputToken: Summary<ModelLabel>;
16
+ timeToFirstToken: Summary<ModelLabel>;
17
+ latency: Summary<ModelLabel>;
18
+ votesPositive: Counter<ModelLabel>;
19
+ votesNegative: Counter<ModelLabel>;
20
+ };
21
+ webSearch: {
22
+ requestCount: Counter;
23
+ pageFetchCount: Counter;
24
+ pageFetchCountError: Counter;
25
+ pageFetchDuration: Summary;
26
+ embeddingDuration: Summary;
27
+ };
28
+ tool: {
29
+ toolUseCount: Counter<ToolLabel>;
30
+ toolUseCountError: Counter<ToolLabel>;
31
+ toolUseDuration: Summary<ToolLabel>;
32
+ timeToChooseTools: Summary<ModelLabel>;
33
+ };
34
+ }
35
+
36
+ export class MetricsServer {
37
+ private static instance: MetricsServer | undefined;
38
+ private readonly enabled: boolean;
39
+ private readonly register: Registry;
40
+ private readonly metrics: Metrics;
41
+ private httpServer: HttpServer | undefined;
42
+
43
+ private constructor() {
44
+ this.enabled = config.METRICS_ENABLED === "true";
45
+ this.register = new Registry();
46
+
47
+ if (this.enabled) {
48
+ collectDefaultMetrics({ register: this.register });
49
+ }
50
+
51
+ this.metrics = this.createMetrics();
52
+
53
+ if (this.enabled) {
54
+ this.startStandaloneServer();
55
+ }
56
+ }
57
+
58
+ public static getInstance(): MetricsServer {
59
+ if (!MetricsServer.instance) {
60
+ MetricsServer.instance = new MetricsServer();
61
+ }
62
+ return MetricsServer.instance;
63
+ }
64
+
65
+ public static getMetrics(): Metrics {
66
+ return MetricsServer.getInstance().metrics;
67
+ }
68
+
69
+ public static isEnabled(): boolean {
70
+ return config.METRICS_ENABLED === "true";
71
+ }
72
+
73
+ public async render(): Promise<string> {
74
+ if (!this.enabled) {
75
+ return "";
76
+ }
77
+
78
+ return this.register.metrics();
79
+ }
80
+
81
+ private createMetrics(): Metrics {
82
+ const labelNames: ModelLabel[] = ["model"];
83
+ const toolLabelNames: ToolLabel[] = ["tool"];
84
+
85
+ const noopRegistry = new Registry();
86
+
87
+ const registry = this.enabled ? this.register : noopRegistry;
88
+
89
+ return {
90
+ model: {
91
+ conversationsTotal: new Counter<ModelLabel>({
92
+ name: "model_conversations_total",
93
+ help: "Total number of conversations",
94
+ labelNames,
95
+ registers: [registry],
96
+ }),
97
+ messagesTotal: new Counter<ModelLabel>({
98
+ name: "model_messages_total",
99
+ help: "Total number of messages",
100
+ labelNames,
101
+ registers: [registry],
102
+ }),
103
+ tokenCountTotal: new Counter<ModelLabel>({
104
+ name: "model_token_count_total",
105
+ help: "Total number of tokens emitted by the model",
106
+ labelNames,
107
+ registers: [registry],
108
+ }),
109
+ timePerOutputToken: new Summary<ModelLabel>({
110
+ name: "model_time_per_output_token_ms",
111
+ help: "Per-token latency in milliseconds",
112
+ labelNames,
113
+ registers: [registry],
114
+ maxAgeSeconds: 5 * 60,
115
+ ageBuckets: 5,
116
+ }),
117
+ timeToFirstToken: new Summary<ModelLabel>({
118
+ name: "model_time_to_first_token_ms",
119
+ help: "Time to first token in milliseconds",
120
+ labelNames,
121
+ registers: [registry],
122
+ maxAgeSeconds: 5 * 60,
123
+ ageBuckets: 5,
124
+ }),
125
+ latency: new Summary<ModelLabel>({
126
+ name: "model_latency_ms",
127
+ help: "Total time to complete a response in milliseconds",
128
+ labelNames,
129
+ registers: [registry],
130
+ maxAgeSeconds: 5 * 60,
131
+ ageBuckets: 5,
132
+ }),
133
+ votesPositive: new Counter<ModelLabel>({
134
+ name: "model_votes_positive_total",
135
+ help: "Total number of positive votes on model messages",
136
+ labelNames,
137
+ registers: [registry],
138
+ }),
139
+ votesNegative: new Counter<ModelLabel>({
140
+ name: "model_votes_negative_total",
141
+ help: "Total number of negative votes on model messages",
142
+ labelNames,
143
+ registers: [registry],
144
+ }),
145
+ },
146
+ webSearch: {
147
+ requestCount: new Counter({
148
+ name: "web_search_request_count",
149
+ help: "Total number of web search requests",
150
+ registers: [registry],
151
+ }),
152
+ pageFetchCount: new Counter({
153
+ name: "web_search_page_fetch_count",
154
+ help: "Total number of web search page fetches",
155
+ registers: [registry],
156
+ }),
157
+ pageFetchCountError: new Counter({
158
+ name: "web_search_page_fetch_count_error",
159
+ help: "Total number of web search page fetch errors",
160
+ registers: [registry],
161
+ }),
162
+ pageFetchDuration: new Summary({
163
+ name: "web_search_page_fetch_duration_ms",
164
+ help: "Duration of web search page fetches in milliseconds",
165
+ registers: [registry],
166
+ maxAgeSeconds: 5 * 60,
167
+ ageBuckets: 5,
168
+ }),
169
+ embeddingDuration: new Summary({
170
+ name: "web_search_embedding_duration_ms",
171
+ help: "Duration of web search embeddings in milliseconds",
172
+ registers: [registry],
173
+ maxAgeSeconds: 5 * 60,
174
+ ageBuckets: 5,
175
+ }),
176
+ },
177
+ tool: {
178
+ toolUseCount: new Counter<ToolLabel>({
179
+ name: "tool_use_count",
180
+ help: "Total number of tool invocations",
181
+ labelNames: toolLabelNames,
182
+ registers: [registry],
183
+ }),
184
+ toolUseCountError: new Counter<ToolLabel>({
185
+ name: "tool_use_count_error",
186
+ help: "Total number of tool invocation errors",
187
+ labelNames: toolLabelNames,
188
+ registers: [registry],
189
+ }),
190
+ toolUseDuration: new Summary<ToolLabel>({
191
+ name: "tool_use_duration_ms",
192
+ help: "Duration of tool invocations in milliseconds",
193
+ labelNames: toolLabelNames,
194
+ registers: [registry],
195
+ maxAgeSeconds: 30 * 60,
196
+ ageBuckets: 5,
197
+ }),
198
+ timeToChooseTools: new Summary<ModelLabel>({
199
+ name: "time_to_choose_tools_ms",
200
+ help: "Time spent selecting tools in milliseconds",
201
+ labelNames,
202
+ registers: [registry],
203
+ maxAgeSeconds: 5 * 60,
204
+ ageBuckets: 5,
205
+ }),
206
+ },
207
+ };
208
+ }
209
+
210
+ private startStandaloneServer() {
211
+ const port = Number(config.METRICS_PORT || "5565");
212
+
213
+ if (!Number.isInteger(port) || port < 0 || port > 65535) {
214
+ logger.warn(`Invalid METRICS_PORT value: ${config.METRICS_PORT}`);
215
+ return;
216
+ }
217
+
218
+ this.httpServer = createServer(async (req, res) => {
219
+ if (req.method !== "GET") {
220
+ res.statusCode = 405;
221
+ res.end("Method Not Allowed");
222
+ return;
223
+ }
224
+
225
+ try {
226
+ const payload = await this.render();
227
+ res.setHeader("Content-Type", "text/plain; version=0.0.4");
228
+ res.end(payload);
229
+ } catch (error) {
230
+ logger.error(error, "Failed to render metrics");
231
+ res.statusCode = 500;
232
+ res.end("Failed to render metrics");
233
+ }
234
+ });
235
+
236
+ this.httpServer.listen(port, () => {
237
+ logger.info(`Metrics server listening on port ${port}`);
238
+ });
239
+
240
+ onExit(async () => {
241
+ if (!this.httpServer) return;
242
+ logger.info("Shutting down metrics server...");
243
+ await new Promise<void>((resolve, reject) => {
244
+ this.httpServer?.close((err) => {
245
+ if (err) {
246
+ reject(err);
247
+ return;
248
+ }
249
+ resolve();
250
+ });
251
+ }).catch((error) => logger.error(error, "Failed to close metrics server"));
252
+ this.httpServer = undefined;
253
+ });
254
+ }
255
+ }
src/routes/conversation/+server.ts CHANGED
@@ -9,6 +9,7 @@ import { models, validateModel } from "$lib/server/models";
9
  import { v4 } from "uuid";
10
  import { authCondition } from "$lib/server/auth";
11
  import { usageLimits } from "$lib/server/usageLimits";
 
12
 
13
  export const POST: RequestHandler = async ({ locals, request }) => {
14
  const body = await request.text();
@@ -97,6 +98,10 @@ export const POST: RequestHandler = async ({ locals, request }) => {
97
  ...(values.fromShare ? { meta: { fromShareId: values.fromShare } } : {}),
98
  });
99
 
 
 
 
 
100
  return new Response(
101
  JSON.stringify({
102
  conversationId: res.insertedId.toString(),
 
9
  import { v4 } from "uuid";
10
  import { authCondition } from "$lib/server/auth";
11
  import { usageLimits } from "$lib/server/usageLimits";
12
+ import { MetricsServer } from "$lib/server/metrics";
13
 
14
  export const POST: RequestHandler = async ({ locals, request }) => {
15
  const body = await request.text();
 
98
  ...(values.fromShare ? { meta: { fromShareId: values.fromShare } } : {}),
99
  });
100
 
101
+ if (MetricsServer.isEnabled()) {
102
+ MetricsServer.getMetrics().model.conversationsTotal.inc({ model: values.model });
103
+ }
104
+
105
  return new Response(
106
  JSON.stringify({
107
  conversationId: res.insertedId.toString(),
src/routes/conversation/[id]/+server.ts CHANGED
@@ -23,6 +23,7 @@ import { textGeneration } from "$lib/server/textGeneration";
23
  import type { TextGenerationContext } from "$lib/server/textGeneration/types";
24
  import { logger } from "$lib/server/logger.js";
25
  import { AbortRegistry } from "$lib/server/abortRegistry";
 
26
 
27
  export async function POST({ request, locals, params, getClientAddress }) {
28
  const id = z.string().parse(params.id);
@@ -313,6 +314,11 @@ export async function POST({ request, locals, params, getClientAddress }) {
313
  let clientDetached = false;
314
 
315
  let lastTokenTimestamp: undefined | Date = undefined;
 
 
 
 
 
316
 
317
  const persistConversation = async () => {
318
  await collections.conversations.updateOne(
@@ -344,9 +350,24 @@ export async function POST({ request, locals, params, getClientAddress }) {
344
  if (event.token === "") return;
345
  messageToWriteTo.content += event.token;
346
 
347
- if (!lastTokenTimestamp) {
348
- lastTokenTimestamp = new Date();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  }
 
350
  lastTokenTimestamp = new Date();
351
  }
352
 
@@ -366,6 +387,10 @@ export async function POST({ request, locals, params, getClientAddress }) {
366
  messageToWriteTo.interrupted = event.interrupted;
367
  messageToWriteTo.content = initialMessageContent + event.text;
368
  finalAnswerReceived = true;
 
 
 
 
369
  }
370
 
371
  // Add file
@@ -533,6 +558,10 @@ export async function POST({ request, locals, params, getClientAddress }) {
533
  },
534
  });
535
 
 
 
 
 
536
  // Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
537
  return new Response(stream, {
538
  headers: {
 
23
  import type { TextGenerationContext } from "$lib/server/textGeneration/types";
24
  import { logger } from "$lib/server/logger.js";
25
  import { AbortRegistry } from "$lib/server/abortRegistry";
26
+ import { MetricsServer } from "$lib/server/metrics";
27
 
28
  export async function POST({ request, locals, params, getClientAddress }) {
29
  const id = z.string().parse(params.id);
 
314
  let clientDetached = false;
315
 
316
  let lastTokenTimestamp: undefined | Date = undefined;
317
+ let firstTokenObserved = false;
318
+ const metricsEnabled = MetricsServer.isEnabled();
319
+ const metrics = metricsEnabled ? MetricsServer.getMetrics() : undefined;
320
+ const metricsModelId = model.id ?? model.name ?? conv.model;
321
+ const metricsLabels = { model: metricsModelId };
322
 
323
  const persistConversation = async () => {
324
  await collections.conversations.updateOne(
 
350
  if (event.token === "") return;
351
  messageToWriteTo.content += event.token;
352
 
353
+ if (metricsEnabled && metrics) {
354
+ const now = Date.now();
355
+ metrics.model.tokenCountTotal.inc(metricsLabels);
356
+
357
+ if (!firstTokenObserved) {
358
+ metrics.model.timeToFirstToken.observe(
359
+ metricsLabels,
360
+ now - promptedAt.getTime()
361
+ );
362
+ firstTokenObserved = true;
363
+ }
364
+
365
+ const previousTimestamp = lastTokenTimestamp
366
+ ? lastTokenTimestamp.getTime()
367
+ : promptedAt.getTime();
368
+ metrics.model.timePerOutputToken.observe(metricsLabels, now - previousTimestamp);
369
  }
370
+
371
  lastTokenTimestamp = new Date();
372
  }
373
 
 
387
  messageToWriteTo.interrupted = event.interrupted;
388
  messageToWriteTo.content = initialMessageContent + event.text;
389
  finalAnswerReceived = true;
390
+
391
+ if (metricsEnabled && metrics) {
392
+ metrics.model.latency.observe(metricsLabels, Date.now() - promptedAt.getTime());
393
+ }
394
  }
395
 
396
  // Add file
 
558
  },
559
  });
560
 
561
+ if (metricsEnabled && metrics) {
562
+ metrics.model.messagesTotal.inc(metricsLabels);
563
+ }
564
+
565
  // Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
566
  return new Response(stream, {
567
  headers: {
src/routes/metrics/+server.ts ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { config } from "$lib/server/config";
2
+ import { MetricsServer } from "$lib/server/metrics";
3
+
4
+ export async function GET() {
5
+ if (config.METRICS_ENABLED !== "true") {
6
+ return new Response("Not Found", { status: 404 });
7
+ }
8
+
9
+ const payload = await MetricsServer.getInstance().render();
10
+
11
+ return new Response(payload, {
12
+ status: 200,
13
+ headers: {
14
+ "Content-Type": "text/plain; version=0.0.4",
15
+ "Cache-Control": "no-store",
16
+ },
17
+ });
18
+ }