import { z } from "zod"; import { openAICompletionToTextGenerationStream } from "./openAICompletionToTextGenerationStream"; import { openAIChatToTextGenerationSingle, openAIChatToTextGenerationStream, } from "./openAIChatToTextGenerationStream"; import type { CompletionCreateParamsStreaming } from "openai/resources/completions"; import type { ChatCompletionCreateParamsNonStreaming, ChatCompletionCreateParamsStreaming, } from "openai/resources/chat/completions"; import { buildPrompt } from "$lib/buildPrompt"; import { config } from "$lib/server/config"; import type { Endpoint } from "../endpoints"; import type OpenAI from "openai"; import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images"; import type { MessageFile } from "$lib/types/Message"; import type { EndpointMessage } from "../endpoints"; // uuid import removed (no tool call ids) export const endpointOAIParametersSchema = z.object({ weight: z.number().int().positive().default(1), model: z.any(), type: z.literal("openai"), baseURL: z.string().url().default("https://api.openai.com/v1"), // Canonical auth token is OPENAI_API_KEY; keep HF_TOKEN as legacy alias apiKey: z.string().default(config.OPENAI_API_KEY || config.HF_TOKEN || "sk-"), completion: z .union([z.literal("completions"), z.literal("chat_completions")]) .default("chat_completions"), defaultHeaders: z.record(z.string()).optional(), defaultQuery: z.record(z.string()).optional(), extraBody: z.record(z.any()).optional(), multimodal: z .object({ image: createImageProcessorOptionsValidator({ supportedMimeTypes: [ // Restrict to the most widely-supported formats "image/png", "image/jpeg", ], preferredMimeType: "image/jpeg", maxSizeInMB: 3, maxWidth: 2048, maxHeight: 2048, }), }) .default({}), /* enable use of max_completion_tokens in place of max_tokens */ useCompletionTokens: z.boolean().default(false), streamingSupported: z.boolean().default(true), }); export async function endpointOai( input: z.input ): Promise { const { baseURL, apiKey, completion, model, defaultHeaders, defaultQuery, multimodal, extraBody, useCompletionTokens, streamingSupported, } = endpointOAIParametersSchema.parse(input); let OpenAI; try { OpenAI = (await import("openai")).OpenAI; } catch (e) { throw new Error("Failed to import OpenAI", { cause: e }); } // Store router metadata if captured let routerMetadata: { route?: string; model?: string } = {}; // Custom fetch wrapper to capture response headers for router metadata const customFetch = async (url: RequestInfo, init?: RequestInit): Promise => { const response = await fetch(url, init); // Capture router headers if present (fallback for non-streaming) const routeHeader = response.headers.get("X-Router-Route"); const modelHeader = response.headers.get("X-Router-Model"); if (routeHeader && modelHeader) { routerMetadata = { route: routeHeader, model: modelHeader, }; } return response; }; const openai = new OpenAI({ apiKey: apiKey || "sk-", baseURL, defaultHeaders, defaultQuery, fetch: customFetch, }); const imageProcessor = makeImageProcessor(multimodal.image); if (completion === "completions") { return async ({ messages, preprompt, continueMessage, generateSettings, conversationId }) => { const prompt = await buildPrompt({ messages, continueMessage, preprompt, model, }); const parameters = { ...model.parameters, ...generateSettings }; const body: CompletionCreateParamsStreaming = { model: model.id ?? model.name, prompt, stream: true, max_tokens: parameters?.max_tokens, stop: parameters?.stop, temperature: parameters?.temperature, top_p: parameters?.top_p, frequency_penalty: parameters?.frequency_penalty, presence_penalty: parameters?.presence_penalty, }; const openAICompletion = await openai.completions.create(body, { body: { ...body, ...extraBody }, headers: { "ChatUI-Conversation-ID": conversationId?.toString() ?? "", "X-use-cache": "false", }, }); return openAICompletionToTextGenerationStream(openAICompletion); }; } else if (completion === "chat_completions") { return async ({ messages, preprompt, generateSettings, conversationId, isMultimodal }) => { // Format messages for the chat API, handling multimodal content if supported let messagesOpenAI: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = await prepareMessages(messages, imageProcessor, isMultimodal ?? model.multimodal); // Check if a system message already exists as the first message const hasSystemMessage = messagesOpenAI.length > 0 && messagesOpenAI[0]?.role === "system"; if (hasSystemMessage) { // System message exists - preserve user configuration if (preprompt !== undefined) { // Prepend preprompt to existing system message if preprompt exists const userSystemPrompt = messagesOpenAI[0].content || ""; messagesOpenAI[0].content = preprompt + (userSystemPrompt ? "\n\n" + userSystemPrompt : ""); } // If no preprompt, user's system message remains unchanged } else { // No system message exists - create a new one with preprompt or empty string messagesOpenAI = [{ role: "system", content: preprompt ?? "" }, ...messagesOpenAI]; } // Combine model defaults with request-specific parameters const parameters = { ...model.parameters, ...generateSettings }; const body = { model: model.id ?? model.name, messages: messagesOpenAI, stream: streamingSupported, // Support two different ways of specifying token limits depending on the model ...(useCompletionTokens ? { max_completion_tokens: parameters?.max_tokens } : { max_tokens: parameters?.max_tokens }), stop: parameters?.stop, temperature: parameters?.temperature, top_p: parameters?.top_p, frequency_penalty: parameters?.frequency_penalty, presence_penalty: parameters?.presence_penalty, }; // Handle both streaming and non-streaming responses with appropriate processors if (streamingSupported) { const openChatAICompletion = await openai.chat.completions.create( body as ChatCompletionCreateParamsStreaming, { body: { ...body, ...extraBody }, headers: { "ChatUI-Conversation-ID": conversationId?.toString() ?? "", "X-use-cache": "false", }, } ); return openAIChatToTextGenerationStream(openChatAICompletion, () => routerMetadata); } else { const openChatAICompletion = await openai.chat.completions.create( body as ChatCompletionCreateParamsNonStreaming, { body: { ...body, ...extraBody }, headers: { "ChatUI-Conversation-ID": conversationId?.toString() ?? "", "X-use-cache": "false", }, } ); return openAIChatToTextGenerationSingle(openChatAICompletion, () => routerMetadata); } }; } else { throw new Error("Invalid completion type"); } } async function prepareMessages( messages: EndpointMessage[], imageProcessor: ReturnType, isMultimodal: boolean ): Promise { return Promise.all( messages.map(async (message) => { if (message.from === "user" && isMultimodal) { const imageParts = await prepareFiles(imageProcessor, message.files ?? []); if (imageParts.length) { const parts = [{ type: "text" as const, text: message.content }, ...imageParts]; return { role: message.from, content: parts }; } } return { role: message.from, content: message.content }; }) ); } async function prepareFiles( imageProcessor: ReturnType, files: MessageFile[] ): Promise { const processedFiles = await Promise.all( files.filter((file) => file.mime.startsWith("image/")).map(imageProcessor) ); return processedFiles.map((file) => ({ type: "image_url" as const, image_url: { url: `data:${file.mime};base64,${file.image.toString("base64")}`, // Improves compatibility with some OpenAI-compatible servers // that expect an explicit detail setting. detail: "auto", }, })); }