Spaces:

mishig
/

chat-ui

Running on CPU Upgrade

victor HF Staff commited on Nov 18, 2025

Commit

1b26311

1 Parent(s): 96117d2

Refactor file handling for multimodal chat messages

Moved message and file preparation logic to a shared utility (prepareFiles.ts) for OpenAI-compatible multimodal payloads. Updated endpointOai and runMcpFlow to use the new prepareMessagesWithFiles function, improving code reuse and maintainability.

Files changed (3) hide show

src/lib/server/endpoints/openai/endpointOai.ts +2 -89
src/lib/server/textGeneration/mcp/runMcpFlow.ts +14 -29
src/lib/server/textGeneration/utils/prepareFiles.ts +88 -0

src/lib/server/endpoints/openai/endpointOai.ts CHANGED Viewed

@@ -14,9 +14,7 @@ import { config } from "$lib/server/config";
 import type { Endpoint } from "../endpoints";
 import type OpenAI from "openai";
 import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
-import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime";
-import type { MessageFile } from "$lib/types/Message";
-import type { EndpointMessage } from "../endpoints";
 // uuid import removed (no tool call ids)
 export const endpointOAIParametersSchema = z.object({
@@ -168,7 +166,7 @@ export async function endpointOai(
 		}) => {
 			// Format messages for the chat API, handling multimodal content if supported
 			let messagesOpenAI: OpenAI.Chat.Completions.ChatCompletionMessageParam[] =
-				await prepareMessages(messages, imageProcessor, isMultimodal ?? model.multimodal);
 			// Normalize preprompt and handle empty values
 			const normalizedPreprompt = typeof preprompt === "string" ? preprompt.trim() : "";
@@ -245,88 +243,3 @@ export async function endpointOai(
 		throw new Error("Invalid completion type");
 	}
 }
-async function prepareMessages(
-	messages: EndpointMessage[],
-	imageProcessor: ReturnType<typeof makeImageProcessor>,
-	isMultimodal: boolean
-): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]> {
-	return Promise.all(
-		messages.map(async (message) => {
-			if (message.from === "user" && message.files && message.files.length > 0) {
-				const { imageParts, textContent } = await prepareFiles(
-					imageProcessor,
-					message.files,
-					isMultimodal
-				);
-				// If we have text files, prepend their content to the message
-				let messageText = message.content;
-				if (textContent.length > 0) {
-					messageText = textContent + "\n\n" + message.content;
-				}
-				// If we have images and multimodal is enabled, use structured content
-				if (imageParts.length > 0 && isMultimodal) {
-					const parts = [{ type: "text" as const, text: messageText }, ...imageParts];
-					return { role: message.from, content: parts };
-				}
-				// Otherwise just use the text (possibly with injected file content)
-				return { role: message.from, content: messageText };
-			}
-			return { role: message.from, content: message.content };
-		})
-	);
-}
-async function prepareFiles(
-	imageProcessor: ReturnType<typeof makeImageProcessor>,
-	files: MessageFile[],
-	isMultimodal: boolean
-): Promise<{
-	imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
-	textContent: string;
-}> {
-	// Separate image and text files
-	const imageFiles = files.filter((file) => file.mime.startsWith("image/"));
-	const textFiles = files.filter((file) => {
-		const mime = (file.mime || "").toLowerCase();
-		const [fileType, fileSubtype] = mime.split("/");
-		return TEXT_MIME_ALLOWLIST.some((allowed) => {
-			const [type, subtype] = allowed.toLowerCase().split("/");
-			const typeOk = type === "*" || type === fileType;
-			const subOk = subtype === "*" || subtype === fileSubtype;
-			return typeOk && subOk;
-		});
-	});
-	// Process images if multimodal is enabled
-	let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = [];
-	if (isMultimodal && imageFiles.length > 0) {
-		const processedFiles = await Promise.all(imageFiles.map(imageProcessor));
-		imageParts = processedFiles.map((file) => ({
-			type: "image_url" as const,
-			image_url: {
-				url: `data:${file.mime};base64,${file.image.toString("base64")}`,
-				// Improves compatibility with some OpenAI-compatible servers
-				// that expect an explicit detail setting.
-				detail: "auto",
-			},
-		}));
-	}
-	// Process text files - inject their content
-	let textContent = "";
-	if (textFiles.length > 0) {
-		const textParts = await Promise.all(
-			textFiles.map(async (file) => {
-				const content = Buffer.from(file.value, "base64").toString("utf-8");
-				return `<document name="${file.name}" type="${file.mime}">\n${content}\n</document>`;
-			})
-		);
-		textContent = textParts.join("\n\n");
-	}
-	return { imageParts, textContent };
-}

 import type { Endpoint } from "../endpoints";
 import type OpenAI from "openai";
 import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
+import { prepareMessagesWithFiles } from "$lib/server/textGeneration/utils/prepareFiles";
 // uuid import removed (no tool call ids)
 export const endpointOAIParametersSchema = z.object({
 		}) => {
 			// Format messages for the chat API, handling multimodal content if supported
 			let messagesOpenAI: OpenAI.Chat.Completions.ChatCompletionMessageParam[] =
+				await prepareMessagesWithFiles(messages, imageProcessor, isMultimodal ?? model.multimodal);
 			// Normalize preprompt and handle empty values
 			const normalizedPreprompt = typeof preprompt === "string" ? preprompt.trim() : "";
 		throw new Error("Invalid completion type");
 	}
 }

src/lib/server/textGeneration/mcp/runMcpFlow.ts CHANGED Viewed

@@ -8,7 +8,6 @@ import type {
 	ChatCompletionChunk,
 	ChatCompletionCreateParamsStreaming,
 	ChatCompletionMessageParam,
-	ChatCompletionContentPart,
 	ChatCompletionMessageToolCall,
 } from "openai/resources/chat/completions";
 import type { Stream } from "openai/streaming";
@@ -20,6 +19,8 @@ import { drainPool } from "$lib/server/mcp/clientPool";
 import type { TextGenerationContext } from "../types";
 import { hasAuthHeader, isStrictHfMcpLogin, hasNonEmptyToken } from "$lib/server/mcp/hf";
 import { buildImageRefResolver } from "./fileRefs";
 export type RunMcpFlowContext = Pick<
 	TextGenerationContext,
@@ -202,6 +203,13 @@ export async function* runMcpFlow({
 	}
 	const resolveFileRef = buildImageRefResolver(messages);
 	const hasImageInput = messages.some((msg) =>
 		(msg.files ?? []).some(
@@ -270,34 +278,11 @@ export async function* runMcpFlow({
 			},
 			"[mcp] starting completion with tools"
 		);
-		const toOpenAiMessage = (msg: EndpointMessage): ChatCompletionMessageParam => {
-			if (msg.from === "user" && mmEnabled) {
-				const parts: ChatCompletionContentPart[] = [{ type: "text", text: msg.content }];
-				for (const file of msg.files ?? []) {
-					if (typeof file?.mime === "string" && file.mime.startsWith("image/")) {
-						const rawValue = file.value as unknown;
-						let encoded: string;
-						if (typeof rawValue === "string") {
-							encoded = rawValue;
-						} else if (rawValue instanceof Uint8Array) {
-							encoded = Buffer.from(rawValue).toString("base64");
-						} else if (rawValue instanceof ArrayBuffer) {
-							encoded = Buffer.from(rawValue).toString("base64");
-						} else {
-							encoded = String(rawValue ?? "");
-						}
-						const url = encoded.startsWith("data:")
-							? encoded
-							: `data:${file.mime};base64,${encoded}`;
-						parts.push({ type: "image_url", image_url: { url, detail: "auto" } });
-					}
-				}
-				return { role: msg.from, content: parts };
-			}
-			return { role: msg.from, content: msg.content };
-		};
-		let messagesOpenAI: ChatCompletionMessageParam[] = messages.map(toOpenAiMessage);
 		const toolPreprompt = buildToolPreprompt(oaTools);
 		const prepromptPieces: string[] = [];
 		if (toolPreprompt.trim().length > 0) {

 	ChatCompletionChunk,
 	ChatCompletionCreateParamsStreaming,
 	ChatCompletionMessageParam,
 	ChatCompletionMessageToolCall,
 } from "openai/resources/chat/completions";
 import type { Stream } from "openai/streaming";
 import type { TextGenerationContext } from "../types";
 import { hasAuthHeader, isStrictHfMcpLogin, hasNonEmptyToken } from "$lib/server/mcp/hf";
 import { buildImageRefResolver } from "./fileRefs";
+import { prepareMessagesWithFiles } from "$lib/server/textGeneration/utils/prepareFiles";
+import { makeImageProcessor } from "$lib/server/endpoints/images";
 export type RunMcpFlowContext = Pick<
 	TextGenerationContext,
 	}
 	const resolveFileRef = buildImageRefResolver(messages);
+	const imageProcessor = makeImageProcessor({
+		supportedMimeTypes: ["image/png", "image/jpeg"],
+		preferredMimeType: "image/jpeg",
+		maxSizeInMB: 1,
+		maxWidth: 1024,
+		maxHeight: 1024,
+	});
 	const hasImageInput = messages.some((msg) =>
 		(msg.files ?? []).some(
 			},
 			"[mcp] starting completion with tools"
 		);
+		let messagesOpenAI: ChatCompletionMessageParam[] = await prepareMessagesWithFiles(
+			messages,
+			imageProcessor,
+			mmEnabled
+		);
 		const toolPreprompt = buildToolPreprompt(oaTools);
 		const prepromptPieces: string[] = [];
 		if (toolPreprompt.trim().length > 0) {

src/lib/server/textGeneration/utils/prepareFiles.ts ADDED Viewed

	@@ -0,0 +1,88 @@

+import type { MessageFile } from "$lib/types/Message";
+import type { EndpointMessage } from "$lib/server/endpoints/endpoints";
+import type { OpenAI } from "openai";
+import { TEXT_MIME_ALLOWLIST } from "$lib/constants/mime";
+import type { makeImageProcessor } from "$lib/server/endpoints/images";
+/**
+ * Prepare chat messages for OpenAI-compatible multimodal payloads.
+ * - Processes images via the provided imageProcessor (resize/convert) when multimodal is enabled.
+ * - Injects text-file content into the user message text.
+ * - Leaves messages untouched when no files or multimodal disabled.
+ */
+export async function prepareMessagesWithFiles(
+	messages: EndpointMessage[],
+	imageProcessor: ReturnType<typeof makeImageProcessor>,
+	isMultimodal: boolean
+): Promise<OpenAI.Chat.Completions.ChatCompletionMessageParam[]> {
+	return Promise.all(
+		messages.map(async (message) => {
+			if (message.from === "user" && message.files && message.files.length > 0) {
+				const { imageParts, textContent } = await prepareFiles(
+					imageProcessor,
+					message.files,
+					isMultimodal
+				);
+				let messageText = message.content;
+				if (textContent.length > 0) {
+					messageText = textContent + "\n\n" + message.content;
+				}
+				if (imageParts.length > 0 && isMultimodal) {
+					const parts = [{ type: "text" as const, text: messageText }, ...imageParts];
+					return { role: message.from, content: parts };
+				}
+				return { role: message.from, content: messageText };
+			}
+			return { role: message.from, content: message.content };
+		})
+	);
+}
+async function prepareFiles(
+	imageProcessor: ReturnType<typeof makeImageProcessor>,
+	files: MessageFile[],
+	isMultimodal: boolean
+): Promise<{
+	imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[];
+	textContent: string;
+}> {
+	const imageFiles = files.filter((file) => file.mime.startsWith("image/"));
+	const textFiles = files.filter((file) => {
+		const mime = (file.mime || "").toLowerCase();
+		const [fileType, fileSubtype] = mime.split("/");
+		return TEXT_MIME_ALLOWLIST.some((allowed) => {
+			const [type, subtype] = allowed.toLowerCase().split("/");
+			const typeOk = type === "*" || type === fileType;
+			const subOk = subtype === "*" || subtype === fileSubtype;
+			return typeOk && subOk;
+		});
+	});
+	let imageParts: OpenAI.Chat.Completions.ChatCompletionContentPartImage[] = [];
+	if (isMultimodal && imageFiles.length > 0) {
+		const processedFiles = await Promise.all(imageFiles.map(imageProcessor));
+		imageParts = processedFiles.map((file) => ({
+			type: "image_url" as const,
+			image_url: {
+				url: `data:${file.mime};base64,${file.image.toString("base64")}`,
+				detail: "auto",
+			},
+		}));
+	}
+	let textContent = "";
+	if (textFiles.length > 0) {
+		const textParts = await Promise.all(
+			textFiles.map(async (file) => {
+				const content = Buffer.from(file.value, "base64").toString("utf-8");
+				return `<document name="${file.name}" type="${file.mime}">\n${content}\n</document>`;
+			})
+		);
+		textContent = textParts.join("\n\n");
+	}
+	return { imageParts, textContent };
+}