Spaces:

mishig
/

chat-ui

Running on CPU Upgrade

App Files Files Community

chat-ui / src /lib /server /textGeneration /utils /prepareFiles.ts

victor HF Staff

Refactor file handling for multimodal chat messages

1b26311 2 months ago

raw

history blame contribute delete

3.06 kB

	import type { MessageFile } from "$lib/types/Message";
	import type { EndpointMessage } from "$lib/server/endpoints/endpoints";
	import type { OpenAI } from "openai";
	import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime";
	import type { makeImageProcessor } from "$lib/server/endpoints/images";

	/**
	* Prepare chat messages for OpenAI-compatible multimodal payloads.
	* - Processes images via the provided imageProcessor (resize/convert) when multimodal is enabled.
	* - Injects text-file content into the user message text.
	* - Leaves messages untouched when no files or multimodal disabled.
	*/
	export async function prepareMessagesWithFiles(
	messages: EndpointMessage[],
	imageProcessor: ReturnType<typeof makeImageProcessor>,
	isMultimodal: boolean
	): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]> {
	return Promise.all(
	messages.map(async (message) => {
	if (message.from === "user" && message.files && message.files.length > 0) {
	const { imageParts, textContent } = await prepareFiles(
	imageProcessor,
	message.files,
	isMultimodal
	);

	let messageText = message.content;
	if (textContent.length > 0) {
	messageText = textContent + "\n\n" + message.content;
	}

	if (imageParts.length > 0 && isMultimodal) {
	const parts = [{ type: "text" as const, text: messageText }, ...imageParts];
	return { role: message.from, content: parts };
	}

	return { role: message.from, content: messageText };
	}
	return { role: message.from, content: message.content };
	})
	);
	}

	async function prepareFiles(
	imageProcessor: ReturnType<typeof makeImageProcessor>,
	files: MessageFile[],
	isMultimodal: boolean
	): Promise<{
	imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
	textContent: string;
	}> {
	const imageFiles = files.filter((file) => file.mime.startsWith("image/"));
	const textFiles = files.filter((file) => {
	const mime = (file.mime \|\| "").toLowerCase();
	const [fileType, fileSubtype] = mime.split("/");
	return TEXT_MIME_ALLOWLIST.some((allowed) => {
	const [type, subtype] = allowed.toLowerCase().split("/");
	const typeOk = type === "*" \|\| type === fileType;
	const subOk = subtype === "*" \|\| subtype === fileSubtype;
	return typeOk && subOk;
	});
	});

	let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = [];
	if (isMultimodal && imageFiles.length > 0) {
	const processedFiles = await Promise.all(imageFiles.map(imageProcessor));
	imageParts = processedFiles.map((file) => ({
	type: "image_url" as const,
	image_url: {
	url: `data:${file.mime};base64,${file.image.toString("base64")}`,
	detail: "auto",
	},
	}));
	}

	let textContent = "";
	if (textFiles.length > 0) {
	const textParts = await Promise.all(
	textFiles.map(async (file) => {
	const content = Buffer.from(file.value, "base64").toString("utf-8");
	return `<document name="${file.name}" type="${file.mime}">\n${content}\n</document>`;
	})
	);
	textContent = textParts.join("\n\n");
	}

	return { imageParts, textContent };
	}