victor's picture
victor HF Staff
Refactor file handling for multimodal chat messages
1b26311
import type { MessageFile } from "$lib/types/Message";
import type { EndpointMessage } from "$lib/server/endpoints/endpoints";
import type { OpenAI } from "openai";
import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime";
import type { makeImageProcessor } from "$lib/server/endpoints/images";
/**
* Prepare chat messages for OpenAI-compatible multimodal payloads.
* - Processes images via the provided imageProcessor (resize/convert) when multimodal is enabled.
* - Injects text-file content into the user message text.
* - Leaves messages untouched when no files or multimodal disabled.
*/
export async function prepareMessagesWithFiles(
messages: EndpointMessage[],
imageProcessor: ReturnType<typeof makeImageProcessor>,
isMultimodal: boolean
): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]> {
return Promise.all(
messages.map(async (message) => {
if (message.from === "user" && message.files && message.files.length > 0) {
const { imageParts, textContent } = await prepareFiles(
imageProcessor,
message.files,
isMultimodal
);
let messageText = message.content;
if (textContent.length > 0) {
messageText = textContent + "\n\n" + message.content;
}
if (imageParts.length > 0 && isMultimodal) {
const parts = [{ type: "text" as const, text: messageText }, ...imageParts];
return { role: message.from, content: parts };
}
return { role: message.from, content: messageText };
}
return { role: message.from, content: message.content };
})
);
}
async function prepareFiles(
imageProcessor: ReturnType<typeof makeImageProcessor>,
files: MessageFile[],
isMultimodal: boolean
): Promise<{
imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
textContent: string;
}> {
const imageFiles = files.filter((file) => file.mime.startsWith("image/"));
const textFiles = files.filter((file) => {
const mime = (file.mime || "").toLowerCase();
const [fileType, fileSubtype] = mime.split("/");
return TEXT_MIME_ALLOWLIST.some((allowed) => {
const [type, subtype] = allowed.toLowerCase().split("/");
const typeOk = type === "*" || type === fileType;
const subOk = subtype === "*" || subtype === fileSubtype;
return typeOk && subOk;
});
});
let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = [];
if (isMultimodal && imageFiles.length > 0) {
const processedFiles = await Promise.all(imageFiles.map(imageProcessor));
imageParts = processedFiles.map((file) => ({
type: "image_url" as const,
image_url: {
url: `data:${file.mime};base64,${file.image.toString("base64")}`,
detail: "auto",
},
}));
}
let textContent = "";
if (textFiles.length > 0) {
const textParts = await Promise.all(
textFiles.map(async (file) => {
const content = Buffer.from(file.value, "base64").toString("utf-8");
return `<document name="${file.name}" type="${file.mime}">\n${content}\n</document>`;
})
);
textContent = textParts.join("\n\n");
}
return { imageParts, textContent };
}