| | const { NativeEmbedder } = require("../../EmbeddingEngines/native"); |
| | const { |
| | handleDefaultStreamResponseV2, |
| | formatChatHistory, |
| | } = require("../../helpers/chat/responses"); |
| | const { |
| | LLMPerformanceMonitor, |
| | } = require("../../helpers/chat/LLMPerformanceMonitor"); |
| |
|
| | class TextGenWebUILLM { |
| | constructor(embedder = null) { |
| | const { OpenAI: OpenAIApi } = require("openai"); |
| | if (!process.env.TEXT_GEN_WEB_UI_BASE_PATH) |
| | throw new Error( |
| | "TextGenWebUI must have a valid base path to use for the api." |
| | ); |
| |
|
| | this.basePath = process.env.TEXT_GEN_WEB_UI_BASE_PATH; |
| | this.openai = new OpenAIApi({ |
| | baseURL: this.basePath, |
| | apiKey: process.env.TEXT_GEN_WEB_UI_API_KEY ?? null, |
| | }); |
| | this.model = null; |
| | this.limits = { |
| | history: this.promptWindowLimit() * 0.15, |
| | system: this.promptWindowLimit() * 0.15, |
| | user: this.promptWindowLimit() * 0.7, |
| | }; |
| |
|
| | this.embedder = embedder ?? new NativeEmbedder(); |
| | this.defaultTemp = 0.7; |
| | this.log(`Inference API: ${this.basePath} Model: ${this.model}`); |
| | } |
| |
|
| | log(text, ...args) { |
| | console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args); |
| | } |
| |
|
| | #appendContext(contextTexts = []) { |
| | if (!contextTexts || !contextTexts.length) return ""; |
| | return ( |
| | "\nContext:\n" + |
| | contextTexts |
| | .map((text, i) => { |
| | return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; |
| | }) |
| | .join("") |
| | ); |
| | } |
| |
|
| | streamingEnabled() { |
| | return "streamGetChatCompletion" in this; |
| | } |
| |
|
| | static promptWindowLimit(_modelName) { |
| | const limit = process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT || 4096; |
| | if (!limit || isNaN(Number(limit))) |
| | throw new Error("No token context limit was set."); |
| | return Number(limit); |
| | } |
| |
|
| | |
| | |
| | promptWindowLimit() { |
| | const limit = process.env.TEXT_GEN_WEB_UI_MODEL_TOKEN_LIMIT || 4096; |
| | if (!limit || isNaN(Number(limit))) |
| | throw new Error("No token context limit was set."); |
| | return Number(limit); |
| | } |
| |
|
| | |
| | |
| | isValidChatCompletionModel(_modelName = "") { |
| | return true; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | #generateContent({ userPrompt, attachments = [] }) { |
| | if (!attachments.length) { |
| | return userPrompt; |
| | } |
| |
|
| | const content = [{ type: "text", text: userPrompt }]; |
| | for (let attachment of attachments) { |
| | content.push({ |
| | type: "image_url", |
| | image_url: { |
| | url: attachment.contentString, |
| | }, |
| | }); |
| | } |
| | return content.flat(); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | constructPrompt({ |
| | systemPrompt = "", |
| | contextTexts = [], |
| | chatHistory = [], |
| | userPrompt = "", |
| | attachments = [], |
| | }) { |
| | const prompt = { |
| | role: "system", |
| | content: `${systemPrompt}${this.#appendContext(contextTexts)}`, |
| | }; |
| | return [ |
| | prompt, |
| | ...formatChatHistory(chatHistory, this.#generateContent), |
| | { |
| | role: "user", |
| | content: this.#generateContent({ userPrompt, attachments }), |
| | }, |
| | ]; |
| | } |
| |
|
| | async getChatCompletion(messages = null, { temperature = 0.7 }) { |
| | const result = await LLMPerformanceMonitor.measureAsyncFunction( |
| | this.openai.chat.completions |
| | .create({ |
| | model: this.model, |
| | messages, |
| | temperature, |
| | }) |
| | .catch((e) => { |
| | throw new Error(e.message); |
| | }) |
| | ); |
| |
|
| | if ( |
| | !result.output.hasOwnProperty("choices") || |
| | result.output.choices.length === 0 |
| | ) |
| | return null; |
| |
|
| | return { |
| | textResponse: result.output.choices[0].message.content, |
| | metrics: { |
| | prompt_tokens: result.output.usage?.prompt_tokens || 0, |
| | completion_tokens: result.output.usage?.completion_tokens || 0, |
| | total_tokens: result.output.usage?.total_tokens || 0, |
| | outputTps: result.output.usage?.completion_tokens / result.duration, |
| | duration: result.duration, |
| | }, |
| | }; |
| | } |
| |
|
| | async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { |
| | const measuredStreamRequest = await LLMPerformanceMonitor.measureStream( |
| | this.openai.chat.completions.create({ |
| | model: this.model, |
| | stream: true, |
| | messages, |
| | temperature, |
| | }), |
| | messages |
| | ); |
| | return measuredStreamRequest; |
| | } |
| |
|
| | handleStream(response, stream, responseProps) { |
| | return handleDefaultStreamResponseV2(response, stream, responseProps); |
| | } |
| |
|
| | |
| | async embedTextInput(textInput) { |
| | return await this.embedder.embedTextInput(textInput); |
| | } |
| | async embedChunks(textChunks = []) { |
| | return await this.embedder.embedChunks(textChunks); |
| | } |
| |
|
| | async compressMessages(promptArgs = {}, rawHistory = []) { |
| | const { messageArrayCompressor } = require("../../helpers/chat"); |
| | const messageArray = this.constructPrompt(promptArgs); |
| | return await messageArrayCompressor(this, messageArray, rawHistory); |
| | } |
| | } |
| |
|
| | module.exports = { |
| | TextGenWebUILLM, |
| | }; |
| |
|