Add Prometheus metrics support with prom-client (#1934)
Browse files- .env +2 -0
- chart/env/prod.yaml +1 -1
- chart/templates/deployment.yaml +5 -0
- chart/templates/service-monitor.yaml +15 -0
- chart/templates/service.yaml +6 -0
- package-lock.json +38 -0
- package.json +1 -0
- src/hooks.server.ts +5 -0
- src/lib/server/config.ts +6 -1
- src/lib/server/metrics.ts +255 -0
- src/routes/conversation/+server.ts +5 -0
- src/routes/conversation/[id]/+server.ts +31 -2
- src/routes/metrics/+server.ts +18 -0
.env
CHANGED
|
@@ -137,6 +137,8 @@ WEBHOOK_URL_REPORT_ASSISTANT=#provide slack webhook url to get notified for repo
|
|
| 137 |
|
| 138 |
|
| 139 |
### Metrics ###
|
|
|
|
|
|
|
| 140 |
LOG_LEVEL=info
|
| 141 |
|
| 142 |
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
### Metrics ###
|
| 140 |
+
METRICS_ENABLED=false
|
| 141 |
+
METRICS_PORT=5565
|
| 142 |
LOG_LEVEL=info
|
| 143 |
|
| 144 |
|
chart/env/prod.yaml
CHANGED
|
@@ -58,7 +58,7 @@ envVars:
|
|
| 58 |
COOKIE_SAMESITE: "lax"
|
| 59 |
COOKIE_SECURE: "true"
|
| 60 |
EXPOSE_API: "true"
|
| 61 |
-
|
| 62 |
LOG_LEVEL: "debug"
|
| 63 |
|
| 64 |
OPENAI_BASE_URL: "https://router.huggingface.co/v1"
|
|
|
|
| 58 |
COOKIE_SAMESITE: "lax"
|
| 59 |
COOKIE_SECURE: "true"
|
| 60 |
EXPOSE_API: "true"
|
| 61 |
+
METRICS_ENABLED: "true"
|
| 62 |
LOG_LEVEL: "debug"
|
| 63 |
|
| 64 |
OPENAI_BASE_URL: "https://router.huggingface.co/v1"
|
chart/templates/deployment.yaml
CHANGED
|
@@ -53,6 +53,11 @@ spec:
|
|
| 53 |
- containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
|
| 54 |
name: http
|
| 55 |
protocol: TCP
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
resources: {{ toYaml .Values.resources | nindent 12 }}
|
| 57 |
{{- with $.Values.extraEnv }}
|
| 58 |
env:
|
|
|
|
| 53 |
- containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
|
| 54 |
name: http
|
| 55 |
protocol: TCP
|
| 56 |
+
{{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
|
| 57 |
+
- containerPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
|
| 58 |
+
name: metrics
|
| 59 |
+
protocol: TCP
|
| 60 |
+
{{- end }}
|
| 61 |
resources: {{ toYaml .Values.resources | nindent 12 }}
|
| 62 |
{{- with $.Values.extraEnv }}
|
| 63 |
env:
|
chart/templates/service-monitor.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
|
| 2 |
+
apiVersion: monitoring.coreos.com/v1
|
| 3 |
+
kind: ServiceMonitor
|
| 4 |
+
metadata:
|
| 5 |
+
labels: {{ include "labels.standard" . | nindent 4 }}
|
| 6 |
+
name: {{ include "name" . }}
|
| 7 |
+
namespace: {{ .Release.Namespace }}
|
| 8 |
+
spec:
|
| 9 |
+
selector:
|
| 10 |
+
matchLabels: {{ include "labels.standard" . | nindent 6 }}
|
| 11 |
+
endpoints:
|
| 12 |
+
- port: metrics
|
| 13 |
+
path: /metrics
|
| 14 |
+
interval: 15s
|
| 15 |
+
{{- end }}
|
chart/templates/service.yaml
CHANGED
|
@@ -11,5 +11,11 @@ spec:
|
|
| 11 |
port: 80
|
| 12 |
protocol: TCP
|
| 13 |
targetPort: http
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
selector: {{ include "labels.standard" . | nindent 4 }}
|
| 15 |
type: {{.Values.service.type}}
|
|
|
|
| 11 |
port: 80
|
| 12 |
protocol: TCP
|
| 13 |
targetPort: http
|
| 14 |
+
{{- if eq "true" $.Values.envVars.METRICS_ENABLED }}
|
| 15 |
+
- name: metrics
|
| 16 |
+
port: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
|
| 17 |
+
protocol: TCP
|
| 18 |
+
targetPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | quote }}
|
| 19 |
+
{{- end }}
|
| 20 |
selector: {{ include "labels.standard" . | nindent 4 }}
|
| 21 |
type: {{.Values.service.type}}
|
package-lock.json
CHANGED
|
@@ -35,6 +35,7 @@
|
|
| 35 |
"pino": "^9.0.0",
|
| 36 |
"pino-pretty": "^11.0.0",
|
| 37 |
"postcss": "^8.4.31",
|
|
|
|
| 38 |
"satori": "^0.10.11",
|
| 39 |
"satori-html": "^0.3.2",
|
| 40 |
"sharp": "^0.33.4",
|
|
@@ -1748,6 +1749,15 @@
|
|
| 1748 |
"node": ">= 8"
|
| 1749 |
}
|
| 1750 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1751 |
"node_modules/@pkgjs/parseargs": {
|
| 1752 |
"version": "0.11.0",
|
| 1753 |
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
|
|
@@ -3515,6 +3525,12 @@
|
|
| 3515 |
"license": "MIT",
|
| 3516 |
"optional": true
|
| 3517 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3518 |
"node_modules/brace-expansion": {
|
| 3519 |
"version": "2.0.2",
|
| 3520 |
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
|
@@ -8066,6 +8082,19 @@
|
|
| 8066 |
],
|
| 8067 |
"license": "MIT"
|
| 8068 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8069 |
"node_modules/psl": {
|
| 8070 |
"version": "1.15.0",
|
| 8071 |
"resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz",
|
|
@@ -9303,6 +9332,15 @@
|
|
| 9303 |
"streamx": "^2.15.0"
|
| 9304 |
}
|
| 9305 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9306 |
"node_modules/text-decoder": {
|
| 9307 |
"version": "1.2.3",
|
| 9308 |
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
|
|
|
|
| 35 |
"pino": "^9.0.0",
|
| 36 |
"pino-pretty": "^11.0.0",
|
| 37 |
"postcss": "^8.4.31",
|
| 38 |
+
"prom-client": "^15.1.3",
|
| 39 |
"satori": "^0.10.11",
|
| 40 |
"satori-html": "^0.3.2",
|
| 41 |
"sharp": "^0.33.4",
|
|
|
|
| 1749 |
"node": ">= 8"
|
| 1750 |
}
|
| 1751 |
},
|
| 1752 |
+
"node_modules/@opentelemetry/api": {
|
| 1753 |
+
"version": "1.9.0",
|
| 1754 |
+
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
|
| 1755 |
+
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
|
| 1756 |
+
"license": "Apache-2.0",
|
| 1757 |
+
"engines": {
|
| 1758 |
+
"node": ">=8.0.0"
|
| 1759 |
+
}
|
| 1760 |
+
},
|
| 1761 |
"node_modules/@pkgjs/parseargs": {
|
| 1762 |
"version": "0.11.0",
|
| 1763 |
"resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
|
|
|
|
| 3525 |
"license": "MIT",
|
| 3526 |
"optional": true
|
| 3527 |
},
|
| 3528 |
+
"node_modules/bintrees": {
|
| 3529 |
+
"version": "1.0.2",
|
| 3530 |
+
"resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz",
|
| 3531 |
+
"integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==",
|
| 3532 |
+
"license": "MIT"
|
| 3533 |
+
},
|
| 3534 |
"node_modules/brace-expansion": {
|
| 3535 |
"version": "2.0.2",
|
| 3536 |
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
|
|
|
|
| 8082 |
],
|
| 8083 |
"license": "MIT"
|
| 8084 |
},
|
| 8085 |
+
"node_modules/prom-client": {
|
| 8086 |
+
"version": "15.1.3",
|
| 8087 |
+
"resolved": "https://registry.npmjs.org/prom-client/-/prom-client-15.1.3.tgz",
|
| 8088 |
+
"integrity": "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==",
|
| 8089 |
+
"license": "Apache-2.0",
|
| 8090 |
+
"dependencies": {
|
| 8091 |
+
"@opentelemetry/api": "^1.4.0",
|
| 8092 |
+
"tdigest": "^0.1.1"
|
| 8093 |
+
},
|
| 8094 |
+
"engines": {
|
| 8095 |
+
"node": "^16 || ^18 || >=20"
|
| 8096 |
+
}
|
| 8097 |
+
},
|
| 8098 |
"node_modules/psl": {
|
| 8099 |
"version": "1.15.0",
|
| 8100 |
"resolved": "https://registry.npmjs.org/psl/-/psl-1.15.0.tgz",
|
|
|
|
| 9332 |
"streamx": "^2.15.0"
|
| 9333 |
}
|
| 9334 |
},
|
| 9335 |
+
"node_modules/tdigest": {
|
| 9336 |
+
"version": "0.1.2",
|
| 9337 |
+
"resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.2.tgz",
|
| 9338 |
+
"integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==",
|
| 9339 |
+
"license": "MIT",
|
| 9340 |
+
"dependencies": {
|
| 9341 |
+
"bintrees": "1.0.2"
|
| 9342 |
+
}
|
| 9343 |
+
},
|
| 9344 |
"node_modules/text-decoder": {
|
| 9345 |
"version": "1.2.3",
|
| 9346 |
"resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
|
package.json
CHANGED
|
@@ -93,6 +93,7 @@
|
|
| 93 |
"pino": "^9.0.0",
|
| 94 |
"pino-pretty": "^11.0.0",
|
| 95 |
"postcss": "^8.4.31",
|
|
|
|
| 96 |
"satori": "^0.10.11",
|
| 97 |
"satori-html": "^0.3.2",
|
| 98 |
"sharp": "^0.33.4",
|
|
|
|
| 93 |
"pino": "^9.0.0",
|
| 94 |
"pino-pretty": "^11.0.0",
|
| 95 |
"postcss": "^8.4.31",
|
| 96 |
+
"prom-client": "^15.1.3",
|
| 97 |
"satori": "^0.10.11",
|
| 98 |
"satori-html": "^0.3.2",
|
| 99 |
"sharp": "^0.33.4",
|
src/hooks.server.ts
CHANGED
|
@@ -18,6 +18,7 @@ import { initExitHandler } from "$lib/server/exitHandler";
|
|
| 18 |
import { refreshConversationStats } from "$lib/jobs/refresh-conversation-stats";
|
| 19 |
import { adminTokenManager } from "$lib/server/adminToken";
|
| 20 |
import { isHostLocalhost } from "$lib/server/isURLLocal";
|
|
|
|
| 21 |
|
| 22 |
export const init: ServerInit = async () => {
|
| 23 |
// Wait for config to be fully loaded
|
|
@@ -41,6 +42,10 @@ export const init: ServerInit = async () => {
|
|
| 41 |
logger.info("Starting server...");
|
| 42 |
initExitHandler();
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
checkAndRunMigrations();
|
| 45 |
refreshConversationStats();
|
| 46 |
|
|
|
|
| 18 |
import { refreshConversationStats } from "$lib/jobs/refresh-conversation-stats";
|
| 19 |
import { adminTokenManager } from "$lib/server/adminToken";
|
| 20 |
import { isHostLocalhost } from "$lib/server/isURLLocal";
|
| 21 |
+
import { MetricsServer } from "$lib/server/metrics";
|
| 22 |
|
| 23 |
export const init: ServerInit = async () => {
|
| 24 |
// Wait for config to be fully loaded
|
|
|
|
| 42 |
logger.info("Starting server...");
|
| 43 |
initExitHandler();
|
| 44 |
|
| 45 |
+
if (config.METRICS_ENABLED === "true") {
|
| 46 |
+
MetricsServer.getInstance();
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
checkAndRunMigrations();
|
| 50 |
refreshConversationStats();
|
| 51 |
|
src/lib/server/config.ts
CHANGED
|
@@ -151,7 +151,12 @@ export const ready = (async () => {
|
|
| 151 |
}
|
| 152 |
})();
|
| 153 |
|
| 154 |
-
type ExtraConfigKeys =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
type ConfigProxy = ConfigManager & { [K in ConfigKey | ExtraConfigKeys]: string };
|
| 157 |
|
|
|
|
| 151 |
}
|
| 152 |
})();
|
| 153 |
|
| 154 |
+
type ExtraConfigKeys =
|
| 155 |
+
| "HF_TOKEN"
|
| 156 |
+
| "OLD_MODELS"
|
| 157 |
+
| "ENABLE_ASSISTANTS"
|
| 158 |
+
| "METRICS_ENABLED"
|
| 159 |
+
| "METRICS_PORT";
|
| 160 |
|
| 161 |
type ConfigProxy = ConfigManager & { [K in ConfigKey | ExtraConfigKeys]: string };
|
| 162 |
|
src/lib/server/metrics.ts
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { collectDefaultMetrics, Counter, Registry, Summary } from "prom-client";
|
| 2 |
+
import { logger } from "$lib/server/logger";
|
| 3 |
+
import { config } from "$lib/server/config";
|
| 4 |
+
import { createServer, type Server as HttpServer } from "http";
|
| 5 |
+
import { onExit } from "./exitHandler";
|
| 6 |
+
|
| 7 |
+
type ModelLabel = "model";
|
| 8 |
+
type ToolLabel = "tool";
|
| 9 |
+
|
| 10 |
+
interface Metrics {
|
| 11 |
+
model: {
|
| 12 |
+
conversationsTotal: Counter<ModelLabel>;
|
| 13 |
+
messagesTotal: Counter<ModelLabel>;
|
| 14 |
+
tokenCountTotal: Counter<ModelLabel>;
|
| 15 |
+
timePerOutputToken: Summary<ModelLabel>;
|
| 16 |
+
timeToFirstToken: Summary<ModelLabel>;
|
| 17 |
+
latency: Summary<ModelLabel>;
|
| 18 |
+
votesPositive: Counter<ModelLabel>;
|
| 19 |
+
votesNegative: Counter<ModelLabel>;
|
| 20 |
+
};
|
| 21 |
+
webSearch: {
|
| 22 |
+
requestCount: Counter;
|
| 23 |
+
pageFetchCount: Counter;
|
| 24 |
+
pageFetchCountError: Counter;
|
| 25 |
+
pageFetchDuration: Summary;
|
| 26 |
+
embeddingDuration: Summary;
|
| 27 |
+
};
|
| 28 |
+
tool: {
|
| 29 |
+
toolUseCount: Counter<ToolLabel>;
|
| 30 |
+
toolUseCountError: Counter<ToolLabel>;
|
| 31 |
+
toolUseDuration: Summary<ToolLabel>;
|
| 32 |
+
timeToChooseTools: Summary<ModelLabel>;
|
| 33 |
+
};
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
export class MetricsServer {
|
| 37 |
+
private static instance: MetricsServer | undefined;
|
| 38 |
+
private readonly enabled: boolean;
|
| 39 |
+
private readonly register: Registry;
|
| 40 |
+
private readonly metrics: Metrics;
|
| 41 |
+
private httpServer: HttpServer | undefined;
|
| 42 |
+
|
| 43 |
+
private constructor() {
|
| 44 |
+
this.enabled = config.METRICS_ENABLED === "true";
|
| 45 |
+
this.register = new Registry();
|
| 46 |
+
|
| 47 |
+
if (this.enabled) {
|
| 48 |
+
collectDefaultMetrics({ register: this.register });
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
this.metrics = this.createMetrics();
|
| 52 |
+
|
| 53 |
+
if (this.enabled) {
|
| 54 |
+
this.startStandaloneServer();
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
public static getInstance(): MetricsServer {
|
| 59 |
+
if (!MetricsServer.instance) {
|
| 60 |
+
MetricsServer.instance = new MetricsServer();
|
| 61 |
+
}
|
| 62 |
+
return MetricsServer.instance;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
public static getMetrics(): Metrics {
|
| 66 |
+
return MetricsServer.getInstance().metrics;
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
public static isEnabled(): boolean {
|
| 70 |
+
return config.METRICS_ENABLED === "true";
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
public async render(): Promise<string> {
|
| 74 |
+
if (!this.enabled) {
|
| 75 |
+
return "";
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
return this.register.metrics();
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
private createMetrics(): Metrics {
|
| 82 |
+
const labelNames: ModelLabel[] = ["model"];
|
| 83 |
+
const toolLabelNames: ToolLabel[] = ["tool"];
|
| 84 |
+
|
| 85 |
+
const noopRegistry = new Registry();
|
| 86 |
+
|
| 87 |
+
const registry = this.enabled ? this.register : noopRegistry;
|
| 88 |
+
|
| 89 |
+
return {
|
| 90 |
+
model: {
|
| 91 |
+
conversationsTotal: new Counter<ModelLabel>({
|
| 92 |
+
name: "model_conversations_total",
|
| 93 |
+
help: "Total number of conversations",
|
| 94 |
+
labelNames,
|
| 95 |
+
registers: [registry],
|
| 96 |
+
}),
|
| 97 |
+
messagesTotal: new Counter<ModelLabel>({
|
| 98 |
+
name: "model_messages_total",
|
| 99 |
+
help: "Total number of messages",
|
| 100 |
+
labelNames,
|
| 101 |
+
registers: [registry],
|
| 102 |
+
}),
|
| 103 |
+
tokenCountTotal: new Counter<ModelLabel>({
|
| 104 |
+
name: "model_token_count_total",
|
| 105 |
+
help: "Total number of tokens emitted by the model",
|
| 106 |
+
labelNames,
|
| 107 |
+
registers: [registry],
|
| 108 |
+
}),
|
| 109 |
+
timePerOutputToken: new Summary<ModelLabel>({
|
| 110 |
+
name: "model_time_per_output_token_ms",
|
| 111 |
+
help: "Per-token latency in milliseconds",
|
| 112 |
+
labelNames,
|
| 113 |
+
registers: [registry],
|
| 114 |
+
maxAgeSeconds: 5 * 60,
|
| 115 |
+
ageBuckets: 5,
|
| 116 |
+
}),
|
| 117 |
+
timeToFirstToken: new Summary<ModelLabel>({
|
| 118 |
+
name: "model_time_to_first_token_ms",
|
| 119 |
+
help: "Time to first token in milliseconds",
|
| 120 |
+
labelNames,
|
| 121 |
+
registers: [registry],
|
| 122 |
+
maxAgeSeconds: 5 * 60,
|
| 123 |
+
ageBuckets: 5,
|
| 124 |
+
}),
|
| 125 |
+
latency: new Summary<ModelLabel>({
|
| 126 |
+
name: "model_latency_ms",
|
| 127 |
+
help: "Total time to complete a response in milliseconds",
|
| 128 |
+
labelNames,
|
| 129 |
+
registers: [registry],
|
| 130 |
+
maxAgeSeconds: 5 * 60,
|
| 131 |
+
ageBuckets: 5,
|
| 132 |
+
}),
|
| 133 |
+
votesPositive: new Counter<ModelLabel>({
|
| 134 |
+
name: "model_votes_positive_total",
|
| 135 |
+
help: "Total number of positive votes on model messages",
|
| 136 |
+
labelNames,
|
| 137 |
+
registers: [registry],
|
| 138 |
+
}),
|
| 139 |
+
votesNegative: new Counter<ModelLabel>({
|
| 140 |
+
name: "model_votes_negative_total",
|
| 141 |
+
help: "Total number of negative votes on model messages",
|
| 142 |
+
labelNames,
|
| 143 |
+
registers: [registry],
|
| 144 |
+
}),
|
| 145 |
+
},
|
| 146 |
+
webSearch: {
|
| 147 |
+
requestCount: new Counter({
|
| 148 |
+
name: "web_search_request_count",
|
| 149 |
+
help: "Total number of web search requests",
|
| 150 |
+
registers: [registry],
|
| 151 |
+
}),
|
| 152 |
+
pageFetchCount: new Counter({
|
| 153 |
+
name: "web_search_page_fetch_count",
|
| 154 |
+
help: "Total number of web search page fetches",
|
| 155 |
+
registers: [registry],
|
| 156 |
+
}),
|
| 157 |
+
pageFetchCountError: new Counter({
|
| 158 |
+
name: "web_search_page_fetch_count_error",
|
| 159 |
+
help: "Total number of web search page fetch errors",
|
| 160 |
+
registers: [registry],
|
| 161 |
+
}),
|
| 162 |
+
pageFetchDuration: new Summary({
|
| 163 |
+
name: "web_search_page_fetch_duration_ms",
|
| 164 |
+
help: "Duration of web search page fetches in milliseconds",
|
| 165 |
+
registers: [registry],
|
| 166 |
+
maxAgeSeconds: 5 * 60,
|
| 167 |
+
ageBuckets: 5,
|
| 168 |
+
}),
|
| 169 |
+
embeddingDuration: new Summary({
|
| 170 |
+
name: "web_search_embedding_duration_ms",
|
| 171 |
+
help: "Duration of web search embeddings in milliseconds",
|
| 172 |
+
registers: [registry],
|
| 173 |
+
maxAgeSeconds: 5 * 60,
|
| 174 |
+
ageBuckets: 5,
|
| 175 |
+
}),
|
| 176 |
+
},
|
| 177 |
+
tool: {
|
| 178 |
+
toolUseCount: new Counter<ToolLabel>({
|
| 179 |
+
name: "tool_use_count",
|
| 180 |
+
help: "Total number of tool invocations",
|
| 181 |
+
labelNames: toolLabelNames,
|
| 182 |
+
registers: [registry],
|
| 183 |
+
}),
|
| 184 |
+
toolUseCountError: new Counter<ToolLabel>({
|
| 185 |
+
name: "tool_use_count_error",
|
| 186 |
+
help: "Total number of tool invocation errors",
|
| 187 |
+
labelNames: toolLabelNames,
|
| 188 |
+
registers: [registry],
|
| 189 |
+
}),
|
| 190 |
+
toolUseDuration: new Summary<ToolLabel>({
|
| 191 |
+
name: "tool_use_duration_ms",
|
| 192 |
+
help: "Duration of tool invocations in milliseconds",
|
| 193 |
+
labelNames: toolLabelNames,
|
| 194 |
+
registers: [registry],
|
| 195 |
+
maxAgeSeconds: 30 * 60,
|
| 196 |
+
ageBuckets: 5,
|
| 197 |
+
}),
|
| 198 |
+
timeToChooseTools: new Summary<ModelLabel>({
|
| 199 |
+
name: "time_to_choose_tools_ms",
|
| 200 |
+
help: "Time spent selecting tools in milliseconds",
|
| 201 |
+
labelNames,
|
| 202 |
+
registers: [registry],
|
| 203 |
+
maxAgeSeconds: 5 * 60,
|
| 204 |
+
ageBuckets: 5,
|
| 205 |
+
}),
|
| 206 |
+
},
|
| 207 |
+
};
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
private startStandaloneServer() {
|
| 211 |
+
const port = Number(config.METRICS_PORT || "5565");
|
| 212 |
+
|
| 213 |
+
if (!Number.isInteger(port) || port < 0 || port > 65535) {
|
| 214 |
+
logger.warn(`Invalid METRICS_PORT value: ${config.METRICS_PORT}`);
|
| 215 |
+
return;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
this.httpServer = createServer(async (req, res) => {
|
| 219 |
+
if (req.method !== "GET") {
|
| 220 |
+
res.statusCode = 405;
|
| 221 |
+
res.end("Method Not Allowed");
|
| 222 |
+
return;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
try {
|
| 226 |
+
const payload = await this.render();
|
| 227 |
+
res.setHeader("Content-Type", "text/plain; version=0.0.4");
|
| 228 |
+
res.end(payload);
|
| 229 |
+
} catch (error) {
|
| 230 |
+
logger.error(error, "Failed to render metrics");
|
| 231 |
+
res.statusCode = 500;
|
| 232 |
+
res.end("Failed to render metrics");
|
| 233 |
+
}
|
| 234 |
+
});
|
| 235 |
+
|
| 236 |
+
this.httpServer.listen(port, () => {
|
| 237 |
+
logger.info(`Metrics server listening on port ${port}`);
|
| 238 |
+
});
|
| 239 |
+
|
| 240 |
+
onExit(async () => {
|
| 241 |
+
if (!this.httpServer) return;
|
| 242 |
+
logger.info("Shutting down metrics server...");
|
| 243 |
+
await new Promise<void>((resolve, reject) => {
|
| 244 |
+
this.httpServer?.close((err) => {
|
| 245 |
+
if (err) {
|
| 246 |
+
reject(err);
|
| 247 |
+
return;
|
| 248 |
+
}
|
| 249 |
+
resolve();
|
| 250 |
+
});
|
| 251 |
+
}).catch((error) => logger.error(error, "Failed to close metrics server"));
|
| 252 |
+
this.httpServer = undefined;
|
| 253 |
+
});
|
| 254 |
+
}
|
| 255 |
+
}
|
src/routes/conversation/+server.ts
CHANGED
|
@@ -9,6 +9,7 @@ import { models, validateModel } from "$lib/server/models";
|
|
| 9 |
import { v4 } from "uuid";
|
| 10 |
import { authCondition } from "$lib/server/auth";
|
| 11 |
import { usageLimits } from "$lib/server/usageLimits";
|
|
|
|
| 12 |
|
| 13 |
export const POST: RequestHandler = async ({ locals, request }) => {
|
| 14 |
const body = await request.text();
|
|
@@ -97,6 +98,10 @@ export const POST: RequestHandler = async ({ locals, request }) => {
|
|
| 97 |
...(values.fromShare ? { meta: { fromShareId: values.fromShare } } : {}),
|
| 98 |
});
|
| 99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
return new Response(
|
| 101 |
JSON.stringify({
|
| 102 |
conversationId: res.insertedId.toString(),
|
|
|
|
| 9 |
import { v4 } from "uuid";
|
| 10 |
import { authCondition } from "$lib/server/auth";
|
| 11 |
import { usageLimits } from "$lib/server/usageLimits";
|
| 12 |
+
import { MetricsServer } from "$lib/server/metrics";
|
| 13 |
|
| 14 |
export const POST: RequestHandler = async ({ locals, request }) => {
|
| 15 |
const body = await request.text();
|
|
|
|
| 98 |
...(values.fromShare ? { meta: { fromShareId: values.fromShare } } : {}),
|
| 99 |
});
|
| 100 |
|
| 101 |
+
if (MetricsServer.isEnabled()) {
|
| 102 |
+
MetricsServer.getMetrics().model.conversationsTotal.inc({ model: values.model });
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
return new Response(
|
| 106 |
JSON.stringify({
|
| 107 |
conversationId: res.insertedId.toString(),
|
src/routes/conversation/[id]/+server.ts
CHANGED
|
@@ -23,6 +23,7 @@ import { textGeneration } from "$lib/server/textGeneration";
|
|
| 23 |
import type { TextGenerationContext } from "$lib/server/textGeneration/types";
|
| 24 |
import { logger } from "$lib/server/logger.js";
|
| 25 |
import { AbortRegistry } from "$lib/server/abortRegistry";
|
|
|
|
| 26 |
|
| 27 |
export async function POST({ request, locals, params, getClientAddress }) {
|
| 28 |
const id = z.string().parse(params.id);
|
|
@@ -313,6 +314,11 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
| 313 |
let clientDetached = false;
|
| 314 |
|
| 315 |
let lastTokenTimestamp: undefined | Date = undefined;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
const persistConversation = async () => {
|
| 318 |
await collections.conversations.updateOne(
|
|
@@ -344,9 +350,24 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
| 344 |
if (event.token === "") return;
|
| 345 |
messageToWriteTo.content += event.token;
|
| 346 |
|
| 347 |
-
if (
|
| 348 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
}
|
|
|
|
| 350 |
lastTokenTimestamp = new Date();
|
| 351 |
}
|
| 352 |
|
|
@@ -366,6 +387,10 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
| 366 |
messageToWriteTo.interrupted = event.interrupted;
|
| 367 |
messageToWriteTo.content = initialMessageContent + event.text;
|
| 368 |
finalAnswerReceived = true;
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
}
|
| 370 |
|
| 371 |
// Add file
|
|
@@ -533,6 +558,10 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|
| 533 |
},
|
| 534 |
});
|
| 535 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
|
| 537 |
return new Response(stream, {
|
| 538 |
headers: {
|
|
|
|
| 23 |
import type { TextGenerationContext } from "$lib/server/textGeneration/types";
|
| 24 |
import { logger } from "$lib/server/logger.js";
|
| 25 |
import { AbortRegistry } from "$lib/server/abortRegistry";
|
| 26 |
+
import { MetricsServer } from "$lib/server/metrics";
|
| 27 |
|
| 28 |
export async function POST({ request, locals, params, getClientAddress }) {
|
| 29 |
const id = z.string().parse(params.id);
|
|
|
|
| 314 |
let clientDetached = false;
|
| 315 |
|
| 316 |
let lastTokenTimestamp: undefined | Date = undefined;
|
| 317 |
+
let firstTokenObserved = false;
|
| 318 |
+
const metricsEnabled = MetricsServer.isEnabled();
|
| 319 |
+
const metrics = metricsEnabled ? MetricsServer.getMetrics() : undefined;
|
| 320 |
+
const metricsModelId = model.id ?? model.name ?? conv.model;
|
| 321 |
+
const metricsLabels = { model: metricsModelId };
|
| 322 |
|
| 323 |
const persistConversation = async () => {
|
| 324 |
await collections.conversations.updateOne(
|
|
|
|
| 350 |
if (event.token === "") return;
|
| 351 |
messageToWriteTo.content += event.token;
|
| 352 |
|
| 353 |
+
if (metricsEnabled && metrics) {
|
| 354 |
+
const now = Date.now();
|
| 355 |
+
metrics.model.tokenCountTotal.inc(metricsLabels);
|
| 356 |
+
|
| 357 |
+
if (!firstTokenObserved) {
|
| 358 |
+
metrics.model.timeToFirstToken.observe(
|
| 359 |
+
metricsLabels,
|
| 360 |
+
now - promptedAt.getTime()
|
| 361 |
+
);
|
| 362 |
+
firstTokenObserved = true;
|
| 363 |
+
}
|
| 364 |
+
|
| 365 |
+
const previousTimestamp = lastTokenTimestamp
|
| 366 |
+
? lastTokenTimestamp.getTime()
|
| 367 |
+
: promptedAt.getTime();
|
| 368 |
+
metrics.model.timePerOutputToken.observe(metricsLabels, now - previousTimestamp);
|
| 369 |
}
|
| 370 |
+
|
| 371 |
lastTokenTimestamp = new Date();
|
| 372 |
}
|
| 373 |
|
|
|
|
| 387 |
messageToWriteTo.interrupted = event.interrupted;
|
| 388 |
messageToWriteTo.content = initialMessageContent + event.text;
|
| 389 |
finalAnswerReceived = true;
|
| 390 |
+
|
| 391 |
+
if (metricsEnabled && metrics) {
|
| 392 |
+
metrics.model.latency.observe(metricsLabels, Date.now() - promptedAt.getTime());
|
| 393 |
+
}
|
| 394 |
}
|
| 395 |
|
| 396 |
// Add file
|
|
|
|
| 558 |
},
|
| 559 |
});
|
| 560 |
|
| 561 |
+
if (metricsEnabled && metrics) {
|
| 562 |
+
metrics.model.messagesTotal.inc(metricsLabels);
|
| 563 |
+
}
|
| 564 |
+
|
| 565 |
// Todo: maybe we should wait for the message to be saved before ending the response - in case of errors
|
| 566 |
return new Response(stream, {
|
| 567 |
headers: {
|
src/routes/metrics/+server.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { config } from "$lib/server/config";
|
| 2 |
+
import { MetricsServer } from "$lib/server/metrics";
|
| 3 |
+
|
| 4 |
+
export async function GET() {
|
| 5 |
+
if (config.METRICS_ENABLED !== "true") {
|
| 6 |
+
return new Response("Not Found", { status: 404 });
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
const payload = await MetricsServer.getInstance().render();
|
| 10 |
+
|
| 11 |
+
return new Response(payload, {
|
| 12 |
+
status: 200,
|
| 13 |
+
headers: {
|
| 14 |
+
"Content-Type": "text/plain; version=0.0.4",
|
| 15 |
+
"Cache-Control": "no-store",
|
| 16 |
+
},
|
| 17 |
+
});
|
| 18 |
+
}
|