| | const { NativeEmbedder } = require("../../EmbeddingEngines/native"); |
| | const { v4: uuidv4 } = require("uuid"); |
| | const { |
| | writeResponseChunk, |
| | clientAbortedHandler, |
| | formatChatHistory, |
| | } = require("../../helpers/chat/responses"); |
| | const fs = require("fs"); |
| | const path = require("path"); |
| | const { safeJsonParse } = require("../../http"); |
| | const { |
| | LLMPerformanceMonitor, |
| | } = require("../../helpers/chat/LLMPerformanceMonitor"); |
| | const { COMETAPI_IGNORE_PATTERNS } = require("./constants"); |
| | const cacheFolder = path.resolve( |
| | process.env.STORAGE_DIR |
| | ? path.resolve(process.env.STORAGE_DIR, "models", "cometapi") |
| | : path.resolve(__dirname, `../../../storage/models/cometapi`) |
| | ); |
| |
|
| | class CometApiLLM { |
| | defaultTimeout = 3_000; |
| | constructor(embedder = null, modelPreference = null) { |
| | if (!process.env.COMETAPI_LLM_API_KEY) |
| | throw new Error("No CometAPI API key was set."); |
| |
|
| | const { OpenAI: OpenAIApi } = require("openai"); |
| | this.basePath = "https://api.cometapi.com/v1"; |
| | this.openai = new OpenAIApi({ |
| | baseURL: this.basePath, |
| | apiKey: process.env.COMETAPI_LLM_API_KEY ?? null, |
| | defaultHeaders: { |
| | "HTTP-Referer": "https://anythingllm.com", |
| | "X-CometAPI-Source": "anythingllm", |
| | }, |
| | }); |
| | this.model = |
| | modelPreference || process.env.COMETAPI_LLM_MODEL_PREF || "gpt-5-mini"; |
| | this.limits = { |
| | history: this.promptWindowLimit() * 0.15, |
| | system: this.promptWindowLimit() * 0.15, |
| | user: this.promptWindowLimit() * 0.7, |
| | }; |
| |
|
| | this.embedder = embedder ?? new NativeEmbedder(); |
| | this.defaultTemp = 0.7; |
| | this.timeout = this.#parseTimeout(); |
| |
|
| | if (!fs.existsSync(cacheFolder)) |
| | fs.mkdirSync(cacheFolder, { recursive: true }); |
| | this.cacheModelPath = path.resolve(cacheFolder, "models.json"); |
| | this.cacheAtPath = path.resolve(cacheFolder, ".cached_at"); |
| |
|
| | this.log(`Loaded with model: ${this.model}`); |
| | } |
| |
|
| | log(text, ...args) { |
| | console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | #parseTimeout() { |
| | this.log( |
| | `CometAPI timeout is set to ${process.env.COMETAPI_LLM_TIMEOUT_MS ?? this.defaultTimeout}ms` |
| | ); |
| | if (isNaN(Number(process.env.COMETAPI_LLM_TIMEOUT_MS))) |
| | return this.defaultTimeout; |
| | const setValue = Number(process.env.COMETAPI_LLM_TIMEOUT_MS); |
| | if (setValue < 500) return 500; |
| | return setValue; |
| | } |
| |
|
| | |
| | |
| | |
| | #cacheIsStale() { |
| | const MAX_STALE = 6.048e8; |
| | if (!fs.existsSync(this.cacheAtPath)) return true; |
| | const now = Number(new Date()); |
| | const timestampMs = Number(fs.readFileSync(this.cacheAtPath)); |
| | return now - timestampMs > MAX_STALE; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | async #syncModels() { |
| | if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale()) |
| | return false; |
| |
|
| | this.log( |
| | "Model cache is not present or stale. Fetching from CometAPI API." |
| | ); |
| | await fetchCometApiModels(); |
| | return; |
| | } |
| |
|
| | #appendContext(contextTexts = []) { |
| | if (!contextTexts || !contextTexts.length) return ""; |
| | return ( |
| | "\nContext:\n" + |
| | contextTexts |
| | .map((text, i) => { |
| | return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; |
| | }) |
| | .join("") |
| | ); |
| | } |
| |
|
| | models() { |
| | if (!fs.existsSync(this.cacheModelPath)) return {}; |
| | return safeJsonParse( |
| | fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }), |
| | {} |
| | ); |
| | } |
| |
|
| | streamingEnabled() { |
| | return "streamGetChatCompletion" in this; |
| | } |
| |
|
| | static promptWindowLimit(modelName) { |
| | const cacheModelPath = path.resolve(cacheFolder, "models.json"); |
| | const availableModels = fs.existsSync(cacheModelPath) |
| | ? safeJsonParse( |
| | fs.readFileSync(cacheModelPath, { encoding: "utf-8" }), |
| | {} |
| | ) |
| | : {}; |
| | return availableModels[modelName]?.maxLength || 4096; |
| | } |
| |
|
| | promptWindowLimit() { |
| | const availableModels = this.models(); |
| | return availableModels[this.model]?.maxLength || 4096; |
| | } |
| |
|
| | async isValidChatCompletionModel(model = "") { |
| | await this.#syncModels(); |
| | const availableModels = this.models(); |
| | return availableModels.hasOwnProperty(model); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | #generateContent({ userPrompt, attachments = [] }) { |
| | if (!attachments.length) { |
| | return userPrompt; |
| | } |
| |
|
| | const content = [{ type: "text", text: userPrompt }]; |
| | for (let attachment of attachments) { |
| | content.push({ |
| | type: "image_url", |
| | image_url: { |
| | url: attachment.contentString, |
| | detail: "auto", |
| | }, |
| | }); |
| | } |
| | return content.flat(); |
| | } |
| |
|
| | constructPrompt({ |
| | systemPrompt = "", |
| | contextTexts = [], |
| | chatHistory = [], |
| | userPrompt = "", |
| | attachments = [], |
| | }) { |
| | const prompt = { |
| | role: "system", |
| | content: `${systemPrompt}${this.#appendContext(contextTexts)}`, |
| | }; |
| | return [ |
| | prompt, |
| | ...formatChatHistory(chatHistory, this.#generateContent), |
| | { |
| | role: "user", |
| | content: this.#generateContent({ userPrompt, attachments }), |
| | }, |
| | ]; |
| | } |
| |
|
| | async getChatCompletion(messages = null, { temperature = 0.7 }) { |
| | if (!(await this.isValidChatCompletionModel(this.model))) |
| | throw new Error( |
| | `CometAPI chat: ${this.model} is not valid for chat completion!` |
| | ); |
| |
|
| | const result = await LLMPerformanceMonitor.measureAsyncFunction( |
| | this.openai.chat.completions |
| | .create({ |
| | model: this.model, |
| | messages, |
| | temperature, |
| | }) |
| | .catch((e) => { |
| | throw new Error(e.message); |
| | }) |
| | ); |
| |
|
| | if ( |
| | !result.output.hasOwnProperty("choices") || |
| | result.output.choices.length === 0 |
| | ) |
| | return null; |
| |
|
| | return { |
| | textResponse: result.output.choices[0].message.content, |
| | metrics: { |
| | prompt_tokens: result.output.usage.prompt_tokens || 0, |
| | completion_tokens: result.output.usage.completion_tokens || 0, |
| | total_tokens: result.output.usage.total_tokens || 0, |
| | outputTps: result.output.usage.completion_tokens / result.duration, |
| | duration: result.duration, |
| | }, |
| | }; |
| | } |
| |
|
| | async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { |
| | if (!(await this.isValidChatCompletionModel(this.model))) |
| | throw new Error( |
| | `CometAPI chat: ${this.model} is not valid for chat completion!` |
| | ); |
| |
|
| | const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( |
| | this.openai.chat.completions.create({ |
| | model: this.model, |
| | stream: true, |
| | messages, |
| | temperature, |
| | }), |
| | messages |
| | ); |
| | return measuredStreamRequest; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | handleStream(response, stream, responseProps) { |
| | const timeoutThresholdMs = this.timeout; |
| | const { uuid = uuidv4(), sources = [] } = responseProps; |
| |
|
| | return new Promise(async (resolve) => { |
| | let fullText = ""; |
| | let lastChunkTime = null; |
| |
|
| | |
| | |
| | |
| | |
| | const handleAbort = () => { |
| | stream?.endMeasurement({ |
| | completion_tokens: LLMPerformanceMonitor.countTokens(fullText), |
| | }); |
| | clientAbortedHandler(resolve, fullText); |
| | }; |
| | response.on("close", handleAbort); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const timeoutCheck = setInterval(() => { |
| | if (lastChunkTime === null) return; |
| |
|
| | const now = Number(new Date()); |
| | const diffMs = now - lastChunkTime; |
| | if (diffMs >= timeoutThresholdMs) { |
| | this.log( |
| | `CometAPI stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.` |
| | ); |
| | writeResponseChunk(response, { |
| | uuid, |
| | sources, |
| | type: "textResponseChunk", |
| | textResponse: "", |
| | close: true, |
| | error: false, |
| | }); |
| | clearInterval(timeoutCheck); |
| | response.removeListener("close", handleAbort); |
| | stream?.endMeasurement({ |
| | completion_tokens: LLMPerformanceMonitor.countTokens(fullText), |
| | }); |
| | resolve(fullText); |
| | } |
| | }, 500); |
| |
|
| | try { |
| | for await (const chunk of stream) { |
| | const message = chunk?.choices?.[0]; |
| | const token = message?.delta?.content; |
| | lastChunkTime = Number(new Date()); |
| |
|
| | if (token) { |
| | fullText += token; |
| | writeResponseChunk(response, { |
| | uuid, |
| | sources: [], |
| | type: "textResponseChunk", |
| | textResponse: token, |
| | close: false, |
| | error: false, |
| | }); |
| | } |
| |
|
| | if (message.finish_reason !== null) { |
| | writeResponseChunk(response, { |
| | uuid, |
| | sources, |
| | type: "textResponseChunk", |
| | textResponse: "", |
| | close: true, |
| | error: false, |
| | }); |
| | response.removeListener("close", handleAbort); |
| | stream?.endMeasurement({ |
| | completion_tokens: LLMPerformanceMonitor.countTokens(fullText), |
| | }); |
| | resolve(fullText); |
| | } |
| | } |
| | } catch (e) { |
| | writeResponseChunk(response, { |
| | uuid, |
| | sources, |
| | type: "abort", |
| | textResponse: null, |
| | close: true, |
| | error: e.message, |
| | }); |
| | response.removeListener("close", handleAbort); |
| | stream?.endMeasurement({ |
| | completion_tokens: LLMPerformanceMonitor.countTokens(fullText), |
| | }); |
| | resolve(fullText); |
| | } |
| | }); |
| | } |
| |
|
| | |
| | async embedTextInput(textInput) { |
| | return await this.embedder.embedTextInput(textInput); |
| | } |
| | async embedChunks(textChunks = []) { |
| | return await this.embedder.embedChunks(textChunks); |
| | } |
| |
|
| | async compressMessages(promptArgs = {}, rawHistory = []) { |
| | const { messageArrayCompressor } = require("../../helpers/chat"); |
| | const messageArray = this.constructPrompt(promptArgs); |
| | return await messageArrayCompressor(this, messageArray, rawHistory); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | async function fetchCometApiModels() { |
| | return await fetch(`https://api.cometapi.com/v1/models`, { |
| | method: "GET", |
| | headers: { |
| | "Content-Type": "application/json", |
| | Authorization: `Bearer ${process.env.COMETAPI_LLM_API_KEY}`, |
| | }, |
| | }) |
| | .then((res) => res.json()) |
| | .then(({ data = [] }) => { |
| | const models = {}; |
| |
|
| | |
| | const chatModels = data.filter((model) => { |
| | const modelId = model.id.toLowerCase(); |
| | return !COMETAPI_IGNORE_PATTERNS.some((pattern) => |
| | modelId.includes(pattern.toLowerCase()) |
| | ); |
| | }); |
| |
|
| | chatModels.forEach((model) => { |
| | models[model.id] = { |
| | id: model.id, |
| | name: model.id, |
| | organization: |
| | model.id.split("/")[0] || model.id.split("-")[0] || "CometAPI", |
| | maxLength: model.context_length || 4096, |
| | }; |
| | }); |
| |
|
| | |
| | if (!fs.existsSync(cacheFolder)) |
| | fs.mkdirSync(cacheFolder, { recursive: true }); |
| | fs.writeFileSync( |
| | path.resolve(cacheFolder, "models.json"), |
| | JSON.stringify(models), |
| | { |
| | encoding: "utf-8", |
| | } |
| | ); |
| | fs.writeFileSync( |
| | path.resolve(cacheFolder, ".cached_at"), |
| | String(Number(new Date())), |
| | { |
| | encoding: "utf-8", |
| | } |
| | ); |
| | return models; |
| | }) |
| | .catch((e) => { |
| | console.error("Error fetching CometAPI models:", e); |
| | return {}; |
| | }); |
| | } |
| |
|
| | module.exports = { |
| | CometApiLLM, |
| | fetchCometApiModels, |
| | }; |
| |
|