import { collectDefaultMetrics, Counter, Registry, Summary } from "prom-client"; import { logger } from "$lib/server/logger"; import { config } from "$lib/server/config"; import { createServer, type Server as HttpServer } from "http"; import { onExit } from "./exitHandler"; type ModelLabel = "model"; type ToolLabel = "tool"; interface Metrics { model: { conversationsTotal: Counter; messagesTotal: Counter; tokenCountTotal: Counter; timePerOutputToken: Summary; timeToFirstToken: Summary; latency: Summary; votesPositive: Counter; votesNegative: Counter; }; webSearch: { requestCount: Counter; pageFetchCount: Counter; pageFetchCountError: Counter; pageFetchDuration: Summary; embeddingDuration: Summary; }; tool: { toolUseCount: Counter; toolUseCountError: Counter; toolUseDuration: Summary; timeToChooseTools: Summary; }; } export class MetricsServer { private static instance: MetricsServer | undefined; private readonly enabled: boolean; private readonly register: Registry; private readonly metrics: Metrics; private httpServer: HttpServer | undefined; private constructor() { this.enabled = config.METRICS_ENABLED === "true"; this.register = new Registry(); if (this.enabled) { collectDefaultMetrics({ register: this.register }); } this.metrics = this.createMetrics(); if (this.enabled) { this.startStandaloneServer(); } } public static getInstance(): MetricsServer { if (!MetricsServer.instance) { MetricsServer.instance = new MetricsServer(); } return MetricsServer.instance; } public static getMetrics(): Metrics { return MetricsServer.getInstance().metrics; } public static isEnabled(): boolean { return config.METRICS_ENABLED === "true"; } public async render(): Promise { if (!this.enabled) { return ""; } return this.register.metrics(); } private createMetrics(): Metrics { const labelNames: ModelLabel[] = ["model"]; const toolLabelNames: ToolLabel[] = ["tool"]; const noopRegistry = new Registry(); const registry = this.enabled ? this.register : noopRegistry; return { model: { conversationsTotal: new Counter({ name: "model_conversations_total", help: "Total number of conversations", labelNames, registers: [registry], }), messagesTotal: new Counter({ name: "model_messages_total", help: "Total number of messages", labelNames, registers: [registry], }), tokenCountTotal: new Counter({ name: "model_token_count_total", help: "Total number of tokens emitted by the model", labelNames, registers: [registry], }), timePerOutputToken: new Summary({ name: "model_time_per_output_token_ms", help: "Per-token latency in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), timeToFirstToken: new Summary({ name: "model_time_to_first_token_ms", help: "Time to first token in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), latency: new Summary({ name: "model_latency_ms", help: "Total time to complete a response in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), votesPositive: new Counter({ name: "model_votes_positive_total", help: "Total number of positive votes on model messages", labelNames, registers: [registry], }), votesNegative: new Counter({ name: "model_votes_negative_total", help: "Total number of negative votes on model messages", labelNames, registers: [registry], }), }, webSearch: { requestCount: new Counter({ name: "web_search_request_count", help: "Total number of web search requests", registers: [registry], }), pageFetchCount: new Counter({ name: "web_search_page_fetch_count", help: "Total number of web search page fetches", registers: [registry], }), pageFetchCountError: new Counter({ name: "web_search_page_fetch_count_error", help: "Total number of web search page fetch errors", registers: [registry], }), pageFetchDuration: new Summary({ name: "web_search_page_fetch_duration_ms", help: "Duration of web search page fetches in milliseconds", registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), embeddingDuration: new Summary({ name: "web_search_embedding_duration_ms", help: "Duration of web search embeddings in milliseconds", registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), }, tool: { toolUseCount: new Counter({ name: "tool_use_count", help: "Total number of tool invocations", labelNames: toolLabelNames, registers: [registry], }), toolUseCountError: new Counter({ name: "tool_use_count_error", help: "Total number of tool invocation errors", labelNames: toolLabelNames, registers: [registry], }), toolUseDuration: new Summary({ name: "tool_use_duration_ms", help: "Duration of tool invocations in milliseconds", labelNames: toolLabelNames, registers: [registry], maxAgeSeconds: 30 * 60, ageBuckets: 5, }), timeToChooseTools: new Summary({ name: "time_to_choose_tools_ms", help: "Time spent selecting tools in milliseconds", labelNames, registers: [registry], maxAgeSeconds: 5 * 60, ageBuckets: 5, }), }, }; } private startStandaloneServer() { const port = Number(config.METRICS_PORT || "5565"); if (!Number.isInteger(port) || port < 0 || port > 65535) { logger.warn(`Invalid METRICS_PORT value: ${config.METRICS_PORT}`); return; } this.httpServer = createServer(async (req, res) => { if (req.method !== "GET") { res.statusCode = 405; res.end("Method Not Allowed"); return; } try { const payload = await this.render(); res.setHeader("Content-Type", "text/plain; version=0.0.4"); res.end(payload); } catch (error) { logger.error(error, "Failed to render metrics"); res.statusCode = 500; res.end("Failed to render metrics"); } }); this.httpServer.listen(port, () => { logger.info(`Metrics server listening on port ${port}`); }); onExit(async () => { if (!this.httpServer) return; logger.info("Shutting down metrics server..."); await new Promise((resolve, reject) => { this.httpServer?.close((err) => { if (err) { reject(err); return; } resolve(); }); }).catch((error) => logger.error(error, "Failed to close metrics server")); this.httpServer = undefined; }); } }