import type { MessageFile } from "$lib/types/Message"; import type { EndpointMessage } from "$lib/server/endpoints/endpoints"; import type { OpenAI } from "openai"; import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime"; import type { makeImageProcessor } from "$lib/server/endpoints/images"; /** * Prepare chat messages for OpenAI-compatible multimodal payloads. * - Processes images via the provided imageProcessor (resize/convert) when multimodal is enabled. * - Injects text-file content into the user message text. * - Leaves messages untouched when no files or multimodal disabled. */ export async function prepareMessagesWithFiles( messages: EndpointMessage[], imageProcessor: ReturnType, isMultimodal: boolean ): Promise { return Promise.all( messages.map(async (message) => { if (message.from === "user" && message.files && message.files.length > 0) { const { imageParts, textContent } = await prepareFiles( imageProcessor, message.files, isMultimodal ); let messageText = message.content; if (textContent.length > 0) { messageText = textContent + "\n\n" + message.content; } if (imageParts.length > 0 && isMultimodal) { const parts = [{ type: "text" as const, text: messageText }, ...imageParts]; return { role: message.from, content: parts }; } return { role: message.from, content: messageText }; } return { role: message.from, content: message.content }; }) ); } async function prepareFiles( imageProcessor: ReturnType, files: MessageFile[], isMultimodal: boolean ): Promise<{ imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[]; textContent: string; }> { const imageFiles = files.filter((file) => file.mime.startsWith("image/")); const textFiles = files.filter((file) => { const mime = (file.mime || "").toLowerCase(); const [fileType, fileSubtype] = mime.split("/"); return TEXT_MIME_ALLOWLIST.some((allowed) => { const [type, subtype] = allowed.toLowerCase().split("/"); const typeOk = type === "*" || type === fileType; const subOk = subtype === "*" || subtype === fileSubtype; return typeOk && subOk; }); }); let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = []; if (isMultimodal && imageFiles.length > 0) { const processedFiles = await Promise.all(imageFiles.map(imageProcessor)); imageParts = processedFiles.map((file) => ({ type: "image_url" as const, image_url: { url: `data:${file.mime};base64,${file.image.toString("base64")}`, detail: "auto", }, })); } let textContent = ""; if (textFiles.length > 0) { const textParts = await Promise.all( textFiles.map(async (file) => { const content = Buffer.from(file.value, "base64").toString("utf-8"); return `\n${content}\n`; }) ); textContent = textParts.join("\n\n"); } return { imageParts, textContent }; }