| | const { toChunks, maximumChunkLength } = require("../../helpers"); |
| |
|
| | class GenericOpenAiEmbedder { |
| | constructor() { |
| | if (!process.env.EMBEDDING_BASE_PATH) |
| | throw new Error( |
| | "GenericOpenAI must have a valid base path to use for the api." |
| | ); |
| | const { OpenAI: OpenAIApi } = require("openai"); |
| | this.basePath = process.env.EMBEDDING_BASE_PATH; |
| | this.openai = new OpenAIApi({ |
| | baseURL: this.basePath, |
| | apiKey: process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY ?? null, |
| | }); |
| | this.model = process.env.EMBEDDING_MODEL_PREF ?? null; |
| | this.embeddingMaxChunkLength = maximumChunkLength(); |
| |
|
| | |
| | |
| | this.log(`Initialized ${this.model}`, { |
| | baseURL: this.basePath, |
| | maxConcurrentChunks: this.maxConcurrentChunks, |
| | embeddingMaxChunkLength: this.embeddingMaxChunkLength, |
| | }); |
| | } |
| |
|
| | log(text, ...args) { |
| | console.log(`\x1b[36m[GenericOpenAiEmbedder]\x1b[0m ${text}`, ...args); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | get apiRequestDelay() { |
| | if (!("GENERIC_OPEN_AI_EMBEDDING_API_DELAY_MS" in process.env)) return null; |
| | if (isNaN(Number(process.env.GENERIC_OPEN_AI_EMBEDDING_API_DELAY_MS))) |
| | return null; |
| | const delayTimeout = Number( |
| | process.env.GENERIC_OPEN_AI_EMBEDDING_API_DELAY_MS |
| | ); |
| | if (delayTimeout < 500) return 500; |
| | return delayTimeout; |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | async runDelay() { |
| | if (!this.apiRequestDelay) return; |
| | this.log(`Delaying new batch request for ${this.apiRequestDelay}ms`); |
| | await new Promise((resolve) => setTimeout(resolve, this.apiRequestDelay)); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | get maxConcurrentChunks() { |
| | if (!process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS) |
| | return 500; |
| | if ( |
| | isNaN(Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS)) |
| | ) |
| | return 500; |
| | return Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS); |
| | } |
| |
|
| | async embedTextInput(textInput) { |
| | const result = await this.embedChunks( |
| | Array.isArray(textInput) ? textInput : [textInput] |
| | ); |
| | return result?.[0] || []; |
| | } |
| |
|
| | async embedChunks(textChunks = []) { |
| | |
| | |
| | |
| | const allResults = []; |
| | for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) { |
| | const { data = [], error = null } = await new Promise((resolve) => { |
| | this.openai.embeddings |
| | .create({ |
| | model: this.model, |
| | input: chunk, |
| | }) |
| | .then((result) => resolve({ data: result?.data, error: null })) |
| | .catch((e) => { |
| | e.type = |
| | e?.response?.data?.error?.code || |
| | e?.response?.status || |
| | "failed_to_embed"; |
| | e.message = e?.response?.data?.error?.message || e.message; |
| | resolve({ data: [], error: e }); |
| | }); |
| | }); |
| |
|
| | |
| | |
| | if (error) |
| | throw new Error(`GenericOpenAI Failed to embed: ${error.message}`); |
| | allResults.push(...(data || [])); |
| | if (this.apiRequestDelay) await this.runDelay(); |
| | } |
| |
|
| | return allResults.length > 0 && |
| | allResults.every((embd) => embd.hasOwnProperty("embedding")) |
| | ? allResults.map((embd) => embd.embedding) |
| | : null; |
| | } |
| | } |
| |
|
| | module.exports = { |
| | GenericOpenAiEmbedder, |
| | }; |
| |
|