Spaces:

leowuming
/

rag

Runtime error

App Files Files Community

rag / server /utils /helpers /chat /LLMPerformanceMonitor.js

gaojintao01

Add files using Git LFS

f8b5d42 4 months ago

history blame contribute delete

3.75 kB

	const { TokenManager } = require("../tiktoken");

	/**
	* @typedef {import("openai/streaming").Stream<import("openai").OpenAI.ChatCompletionChunk>} OpenAICompatibleStream
	* @typedef {(reportedUsage: {[key: string]: number, completion_tokens?: number, prompt_tokens?: number}) => StreamMetrics} EndMeasurementFunction
	* @typedef {Array<{content: string}>} Messages
	*/

	/**
	* @typedef {Object} StreamMetrics
	* @property {number} prompt_tokens - the number of tokens in the prompt
	* @property {number} completion_tokens - the number of tokens in the completion
	* @property {number} total_tokens - the total number of tokens
	* @property {number} outputTps - the tokens per second of the output
	* @property {number} duration - the duration of the stream
	*/

	/**
	* @typedef {Object} MonitoredStream
	* @property {number} start - the start time of the stream
	* @property {number} duration - the duration of the stream
	* @property {StreamMetrics} metrics - the metrics of the stream
	* @property {EndMeasurementFunction} endMeasurement - the method to end the stream and calculate the metrics
	*/

	class LLMPerformanceMonitor {
	static tokenManager = new TokenManager();
	/**
	* Counts the tokens in the messages.
	* @param {Array<{content: string}>} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
	* @returns {number}
	*/
	static countTokens(messages = []) {
	try {
	return this.tokenManager.statsFrom(messages);
	} catch (e) {
	return 0;
	}
	}
	/**
	* Wraps a function and logs the duration (in seconds) of the function call.
	* @param {Function} func
	* @returns {Promise<{output: any, duration: number}>}
	*/
	static measureAsyncFunction(func) {
	return (async () => {
	const start = Date.now();
	const output = await func; // is a promise
	const end = Date.now();
	return { output, duration: (end - start) / 1000 };
	})();
	}

	/**
	* Wraps a completion stream and and attaches a start time and duration property to the stream.
	* Also attaches an `endMeasurement` method to the stream that will calculate the duration of the stream and metrics.
	* @param {Promise<OpenAICompatibleStream>} func
	* @param {Messages} messages - the messages sent to the LLM so we can calculate the prompt tokens since most providers do not return this on stream
	* @param {boolean} runPromptTokenCalculation - whether to run the prompt token calculation to estimate the `prompt_tokens` metric. This is useful for providers that do not return this on stream.
	* @returns {Promise<MonitoredStream>}
	*/
	static async measureStream(
	func,
	messages = [],
	runPromptTokenCalculation = true
	) {
	const stream = await func;
	stream.start = Date.now();
	stream.duration = 0;
	stream.metrics = {
	completion_tokens: 0,
	prompt_tokens: runPromptTokenCalculation ? this.countTokens(messages) : 0,
	total_tokens: 0,
	outputTps: 0,
	duration: 0,
	};

	stream.endMeasurement = (reportedUsage = {}) => {
	const end = Date.now();
	const duration = (end - stream.start) / 1000;

	// Merge the reported usage with the existing metrics
	// so the math in the metrics object is correct when calculating
	stream.metrics = {
	...stream.metrics,
	...reportedUsage,
	};

	stream.metrics.total_tokens =
	stream.metrics.prompt_tokens + (stream.metrics.completion_tokens \|\| 0);
	stream.metrics.outputTps = stream.metrics.completion_tokens / duration;
	stream.metrics.duration = duration;
	return stream.metrics;
	};
	return stream;
	}
	}

	module.exports = {
	LLMPerformanceMonitor,
	};