Spaces:
Running
Running
| import { z } from "zod"; | |
| import { openAICompletionToTextGenerationStream } from "./openAICompletionToTextGenerationStream"; | |
| import { | |
| openAIChatToTextGenerationSingle, | |
| openAIChatToTextGenerationStream, | |
| } from "./openAIChatToTextGenerationStream"; | |
| import type { CompletionCreateParamsStreaming } from "openai/resources/completions"; | |
| import type { | |
| ChatCompletionCreateParamsNonStreaming, | |
| ChatCompletionCreateParamsStreaming, | |
| } from "openai/resources/chat/completions"; | |
| import { buildPrompt } from "$lib/buildPrompt"; | |
| import { config } from "$lib/server/config"; | |
| import type { Endpoint } from "../endpoints"; | |
| import type OpenAI from "openai"; | |
| import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images"; | |
| import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime"; | |
| import type { MessageFile } from "$lib/types/Message"; | |
| import type { EndpointMessage } from "../endpoints"; | |
| // uuid import removed (no tool call ids) | |
| export const endpointOAIParametersSchema = z.object({ | |
| weight: z.number().int().positive().default(1), | |
| model: z.any(), | |
| type: z.literal("openai"), | |
| baseURL: z.string().url().default("https://api.openai.com/v1"), | |
| // Canonical auth token is OPENAI_API_KEY; keep HF_TOKEN as legacy alias | |
| apiKey: z.string().default(config.OPENAI_API_KEY || config.HF_TOKEN || "sk-"), | |
| completion: z | |
| .union([z.literal("completions"), z.literal("chat_completions")]) | |
| .default("chat_completions"), | |
| defaultHeaders: z.record(z.string()).optional(), | |
| defaultQuery: z.record(z.string()).optional(), | |
| extraBody: z.record(z.any()).optional(), | |
| multimodal: z | |
| .object({ | |
| image: createImageProcessorOptionsValidator({ | |
| supportedMimeTypes: [ | |
| // Restrict to the most widely-supported formats | |
| "image/png", | |
| "image/jpeg", | |
| ], | |
| preferredMimeType: "image/jpeg", | |
| maxSizeInMB: 1, | |
| maxWidth: 1024, | |
| maxHeight: 1024, | |
| }), | |
| }) | |
| .default({}), | |
| /* enable use of max_completion_tokens in place of max_tokens */ | |
| useCompletionTokens: z.boolean().default(false), | |
| streamingSupported: z.boolean().default(true), | |
| }); | |
| export async function endpointOai( | |
| input: z.input<typeof endpointOAIParametersSchema> | |
| ): Promise<Endpoint> { | |
| const { | |
| baseURL, | |
| apiKey, | |
| completion, | |
| model, | |
| defaultHeaders, | |
| defaultQuery, | |
| multimodal, | |
| extraBody, | |
| useCompletionTokens, | |
| streamingSupported, | |
| } = endpointOAIParametersSchema.parse(input); | |
| let OpenAI; | |
| try { | |
| OpenAI = (await import("openai")).OpenAI; | |
| } catch (e) { | |
| throw new Error("Failed to import OpenAI", { cause: e }); | |
| } | |
| // Store router metadata if captured | |
| let routerMetadata: { route?: string; model?: string; provider?: string } = {}; | |
| // Custom fetch wrapper to capture response headers for router metadata | |
| const customFetch = async (url: RequestInfo, init?: RequestInit): Promise<Response> => { | |
| const response = await fetch(url, init); | |
| // Capture router headers if present (fallback for non-streaming) | |
| const routeHeader = response.headers.get("X-Router-Route"); | |
| const modelHeader = response.headers.get("X-Router-Model"); | |
| const providerHeader = response.headers.get("x-inference-provider"); | |
| if (routeHeader && modelHeader) { | |
| routerMetadata = { | |
| route: routeHeader, | |
| model: modelHeader, | |
| provider: providerHeader || undefined, | |
| }; | |
| } else if (providerHeader) { | |
| // Even without router metadata, capture provider info | |
| routerMetadata = { | |
| provider: providerHeader, | |
| }; | |
| } | |
| return response; | |
| }; | |
| const openai = new OpenAI({ | |
| apiKey: apiKey || "sk-", | |
| baseURL, | |
| defaultHeaders: { | |
| ...(config.PUBLIC_APP_NAME === "HuggingChat" && { "User-Agent": "huggingchat" }), | |
| ...defaultHeaders, | |
| }, | |
| defaultQuery, | |
| fetch: customFetch, | |
| }); | |
| const imageProcessor = makeImageProcessor(multimodal.image); | |
| if (completion === "completions") { | |
| return async ({ | |
| messages, | |
| preprompt, | |
| generateSettings, | |
| conversationId, | |
| locals, | |
| abortSignal, | |
| }) => { | |
| const prompt = await buildPrompt({ | |
| messages, | |
| preprompt, | |
| model, | |
| }); | |
| const parameters = { ...model.parameters, ...generateSettings }; | |
| const body: CompletionCreateParamsStreaming = { | |
| model: model.id ?? model.name, | |
| prompt, | |
| stream: true, | |
| max_tokens: parameters?.max_tokens, | |
| stop: parameters?.stop, | |
| temperature: parameters?.temperature, | |
| top_p: parameters?.top_p, | |
| frequency_penalty: parameters?.frequency_penalty, | |
| presence_penalty: parameters?.presence_penalty, | |
| }; | |
| const openAICompletion = await openai.completions.create(body, { | |
| body: { ...body, ...extraBody }, | |
| headers: { | |
| "ChatUI-Conversation-ID": conversationId?.toString() ?? "", | |
| "X-use-cache": "false", | |
| ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), | |
| }, | |
| signal: abortSignal, | |
| }); | |
| return openAICompletionToTextGenerationStream(openAICompletion); | |
| }; | |
| } else if (completion === "chat_completions") { | |
| return async ({ | |
| messages, | |
| preprompt, | |
| generateSettings, | |
| conversationId, | |
| isMultimodal, | |
| locals, | |
| abortSignal, | |
| }) => { | |
| // Format messages for the chat API, handling multimodal content if supported | |
| let messagesOpenAI: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = | |
| await prepareMessages(messages, imageProcessor, isMultimodal ?? model.multimodal); | |
| // Normalize preprompt and handle empty values | |
| const normalizedPreprompt = | |
| typeof preprompt === "string" ? preprompt.trim() : ""; | |
| // Check if a system message already exists as the first message | |
| const hasSystemMessage = | |
| messagesOpenAI.length > 0 && messagesOpenAI[0]?.role === "system"; | |
| if (hasSystemMessage) { | |
| // Prepend normalized preprompt to existing system content when non-empty | |
| if (normalizedPreprompt) { | |
| const userSystemPrompt = | |
| (typeof messagesOpenAI[0].content === "string" | |
| ? (messagesOpenAI[0].content as string) | |
| : "") || ""; | |
| messagesOpenAI[0].content = | |
| normalizedPreprompt + (userSystemPrompt ? "\n\n" + userSystemPrompt : ""); | |
| } | |
| } else { | |
| // Insert a system message only if the preprompt is non-empty | |
| if (normalizedPreprompt) { | |
| messagesOpenAI = [ | |
| { role: "system", content: normalizedPreprompt }, | |
| ...messagesOpenAI, | |
| ]; | |
| } | |
| } | |
| // Combine model defaults with request-specific parameters | |
| const parameters = { ...model.parameters, ...generateSettings }; | |
| const body = { | |
| model: model.id ?? model.name, | |
| messages: messagesOpenAI, | |
| stream: streamingSupported, | |
| // Support two different ways of specifying token limits depending on the model | |
| ...(useCompletionTokens | |
| ? { max_completion_tokens: parameters?.max_tokens } | |
| : { max_tokens: parameters?.max_tokens }), | |
| stop: parameters?.stop, | |
| temperature: parameters?.temperature, | |
| top_p: parameters?.top_p, | |
| frequency_penalty: parameters?.frequency_penalty, | |
| presence_penalty: parameters?.presence_penalty, | |
| }; | |
| // Handle both streaming and non-streaming responses with appropriate processors | |
| if (streamingSupported) { | |
| const openChatAICompletion = await openai.chat.completions.create( | |
| body as ChatCompletionCreateParamsStreaming, | |
| { | |
| body: { ...body, ...extraBody }, | |
| headers: { | |
| "ChatUI-Conversation-ID": conversationId?.toString() ?? "", | |
| "X-use-cache": "false", | |
| ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), | |
| }, | |
| signal: abortSignal, | |
| } | |
| ); | |
| return openAIChatToTextGenerationStream(openChatAICompletion, () => routerMetadata); | |
| } else { | |
| const openChatAICompletion = await openai.chat.completions.create( | |
| body as ChatCompletionCreateParamsNonStreaming, | |
| { | |
| body: { ...body, ...extraBody }, | |
| headers: { | |
| "ChatUI-Conversation-ID": conversationId?.toString() ?? "", | |
| "X-use-cache": "false", | |
| ...(locals?.token ? { Authorization: `Bearer ${locals.token}` } : {}), | |
| }, | |
| signal: abortSignal, | |
| } | |
| ); | |
| return openAIChatToTextGenerationSingle(openChatAICompletion, () => routerMetadata); | |
| } | |
| }; | |
| } else { | |
| throw new Error("Invalid completion type"); | |
| } | |
| } | |
| async function prepareMessages( | |
| messages: EndpointMessage[], | |
| imageProcessor: ReturnType<typeof makeImageProcessor>, | |
| isMultimodal: boolean | |
| ): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]> { | |
| return Promise.all( | |
| messages.map(async (message) => { | |
| if (message.from === "user" && message.files && message.files.length > 0) { | |
| const { imageParts, textContent } = await prepareFiles( | |
| imageProcessor, | |
| message.files, | |
| isMultimodal | |
| ); | |
| // If we have text files, prepend their content to the message | |
| let messageText = message.content; | |
| if (textContent.length > 0) { | |
| messageText = textContent + "\n\n" + message.content; | |
| } | |
| // If we have images and multimodal is enabled, use structured content | |
| if (imageParts.length > 0 && isMultimodal) { | |
| const parts = [{ type: "text" as const, text: messageText }, ...imageParts]; | |
| return { role: message.from, content: parts }; | |
| } | |
| // Otherwise just use the text (possibly with injected file content) | |
| return { role: message.from, content: messageText }; | |
| } | |
| return { role: message.from, content: message.content }; | |
| }) | |
| ); | |
| } | |
| async function prepareFiles( | |
| imageProcessor: ReturnType<typeof makeImageProcessor>, | |
| files: MessageFile[], | |
| isMultimodal: boolean | |
| ): Promise<{ | |
| imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[]; | |
| textContent: string; | |
| }> { | |
| // Separate image and text files | |
| const imageFiles = files.filter((file) => file.mime.startsWith("image/")); | |
| const textFiles = files.filter((file) => { | |
| const mime = (file.mime || "").toLowerCase(); | |
| const [fileType, fileSubtype] = mime.split("/"); | |
| return TEXT_MIME_ALLOWLIST.some((allowed) => { | |
| const [type, subtype] = allowed.toLowerCase().split("/"); | |
| const typeOk = type === "*" || type === fileType; | |
| const subOk = subtype === "*" || subtype === fileSubtype; | |
| return typeOk && subOk; | |
| }); | |
| }); | |
| // Process images if multimodal is enabled | |
| let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = []; | |
| if (isMultimodal && imageFiles.length > 0) { | |
| const processedFiles = await Promise.all(imageFiles.map(imageProcessor)); | |
| imageParts = processedFiles.map((file) => ({ | |
| type: "image_url" as const, | |
| image_url: { | |
| url: `data:${file.mime};base64,${file.image.toString("base64")}`, | |
| // Improves compatibility with some OpenAI-compatible servers | |
| // that expect an explicit detail setting. | |
| detail: "auto", | |
| }, | |
| })); | |
| } | |
| // Process text files - inject their content | |
| let textContent = ""; | |
| if (textFiles.length > 0) { | |
| const textParts = await Promise.all( | |
| textFiles.map(async (file) => { | |
| const content = Buffer.from(file.value, "base64").toString("utf-8"); | |
| return `<document name="${file.name}" type="${file.mime}">\n${content}\n</document>`; | |
| }) | |
| ); | |
| textContent = textParts.join("\n\n"); | |
| } | |
| return { imageParts, textContent }; | |
| } | |