pluralchat

Sleeping

App Files Files Community

victor HF Staff commited on Sep 25, 2025

Commit

9092d43

unverified ·

1 Parent(s): b9b13da

Omni multimodality (#1880)

Browse files

* Add multimodal fallback support to router

* fix: improve error handling for multimodal model routing

* Handle multimodal messages without files

Files changed (4) hide show

.env +3 -0
src/lib/server/endpoints/openai/endpointOai.ts +5 -5
src/lib/server/models.ts +7 -0
src/lib/server/router/endpoint.ts +60 -8

.env CHANGED Viewed

@@ -56,6 +56,9 @@ LLM_ROUTER_FALLBACK_MODEL=
 # Arch selection timeout in milliseconds (default 10000)
 LLM_ROUTER_ARCH_TIMEOUT_MS=10000
 # Router UI overrides (client-visible)
 # Public display name for the router entry in the model list. Defaults to "Omni".
 PUBLIC_LLM_ROUTER_DISPLAY_NAME=Omni

 # Arch selection timeout in milliseconds (default 10000)
 LLM_ROUTER_ARCH_TIMEOUT_MS=10000
+# Enable router multimodal fallback (set to true to allow image inputs via router)
+LLM_ROUTER_ENABLE_MULTIMODAL=false
 # Router UI overrides (client-visible)
 # Public display name for the router entry in the model list. Defaults to "Omni".
 PUBLIC_LLM_ROUTER_DISPLAY_NAME=Omni

src/lib/server/endpoints/openai/endpointOai.ts CHANGED Viewed

@@ -217,11 +217,11 @@ async function prepareMessages(
 	return Promise.all(
 		messages.map(async (message) => {
 			if (message.from === "user" && isMultimodal) {
-				const parts = [
-					{ type: "text" as const, text: message.content },
-					...(await prepareFiles(imageProcessor, message.files ?? [])),
-				];
-				return { role: message.from, content: parts };
 			}
 			return { role: message.from, content: message.content };
 		})

 	return Promise.all(
 		messages.map(async (message) => {
 			if (message.from === "user" && isMultimodal) {
+				const imageParts = await prepareFiles(imageProcessor, message.files ?? []);
+				if (imageParts.length) {
+					const parts = [{ type: "text" as const, text: message.content }, ...imageParts];
+					return { role: message.from, content: parts };
+				}
 			}
 			return { role: message.from, content: message.content };
 		})

src/lib/server/models.ts CHANGED Viewed

@@ -288,6 +288,8 @@ const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim();
 const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni";
 const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim();
 const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni";
 let decorated = builtModels as any[];
@@ -309,6 +311,11 @@ if (archBase) {
 		unlisted: false,
 	} as any;
 	const aliasBase = await processModel(aliasRaw);
 	// Create a self-referential ProcessedModel for the router endpoint
 	let aliasModel: any = {};

 const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni";
 const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim();
 const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni";
+const routerMultimodalEnabled =
+	(config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true";
 let decorated = builtModels as any[];
 		unlisted: false,
 	} as any;
+	if (routerMultimodalEnabled) {
+		aliasRaw.multimodal = true;
+		aliasRaw.multimodalAcceptedMimetypes = ["image/*"];
+	}
 	const aliasBase = await processModel(aliasRaw);
 	// Create a self-referential ProcessedModel for the router endpoint
 	let aliasModel: any = {};

src/lib/server/router/endpoint.ts CHANGED Viewed

@@ -8,6 +8,8 @@ import { getRoutes, resolveRouteModels } from "./policy";
 const REASONING_BLOCK_REGEX = /<think>[\s\S]*?(?:<\/think>|$)/g;
 function stripReasoningBlocks(text: string): string {
 	const stripped = text.replace(REASONING_BLOCK_REGEX, "");
 	return stripped === text ? text : stripped.trim();
@@ -31,10 +33,13 @@ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise<E
 	return async function routerEndpoint(params: EndpointParameters) {
 		const routes = await getRoutes();
 		const sanitizedMessages = params.messages.map(stripReasoningFromMessage);
-		const { routeName } = await archSelectRoute(sanitizedMessages);
-		const fallbackModel = config.LLM_ROUTER_FALLBACK_MODEL || routerModel.id;
-		const { candidates } = resolveRouteModels(routeName, routes, fallbackModel);
 		// Helper to create an OpenAI endpoint for a specific candidate model id
 		async function createCandidateEndpoint(candidateModelId: string): Promise<Endpoint> {
@@ -69,24 +74,71 @@ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise<E
 		}
 		// Yield router metadata for immediate UI display, using the actual candidate
-		async function* metadataThenStream(gen: AsyncGenerator<any>, actualModel: string) {
 			yield {
 				token: { id: 0, text: "", special: true, logprob: 0 },
 				generated_text: null,
 				details: null,
-				routerMetadata: { route: routeName, model: actualModel },
 			} as any;
 			for await (const ev of gen) yield ev;
 		}
 		let lastErr: any = undefined;
 		for (const candidate of candidates) {
 			try {
 				logger.info({ route: routeName, model: candidate }, "[router] trying candidate");
 				const ep = await createCandidateEndpoint(candidate);
 				const gen = await ep({ ...params });
-				// Yield metadata with the actual candidate used
-				return metadataThenStream(gen, candidate);
 			} catch (e) {
 				lastErr = e;
 				logger.warn(

 const REASONING_BLOCK_REGEX = /<think>[\s\S]*?(?:<\/think>|$)/g;
+const ROUTER_MULTIMODAL_ROUTE = "multimodal";
 function stripReasoningBlocks(text: string): string {
 	const stripped = text.replace(REASONING_BLOCK_REGEX, "");
 	return stripped === text ? text : stripped.trim();
 	return async function routerEndpoint(params: EndpointParameters) {
 		const routes = await getRoutes();
 		const sanitizedMessages = params.messages.map(stripReasoningFromMessage);
+		const routerMultimodalEnabled =
+			(config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true";
+		const hasImageInput = sanitizedMessages.some((message) =>
+			(message.files ?? []).some(
+				(file) => typeof file?.mime === "string" && file.mime.startsWith("image/")
+			)
+		);
 		// Helper to create an OpenAI endpoint for a specific candidate model id
 		async function createCandidateEndpoint(candidateModelId: string): Promise<Endpoint> {
 		}
 		// Yield router metadata for immediate UI display, using the actual candidate
+		async function* metadataThenStream(
+			gen: AsyncGenerator<any>,
+			actualModel: string,
+			selectedRoute: string
+		) {
 			yield {
 				token: { id: 0, text: "", special: true, logprob: 0 },
 				generated_text: null,
 				details: null,
+				routerMetadata: { route: selectedRoute, model: actualModel },
 			} as any;
 			for await (const ev of gen) yield ev;
 		}
+		async function findFirstMultimodalCandidateId(): Promise<string | undefined> {
+			try {
+				const mod = await import("../models");
+				const all = (mod as any).models as ProcessedModel[];
+				const first = all?.find((m) => !m.isRouter && m.multimodal);
+				return first?.id ?? first?.name;
+			} catch (e) {
+				logger.warn({ err: String(e) }, "[router] failed to load models for multimodal lookup");
+				return undefined;
+			}
+		}
+		if (routerMultimodalEnabled && hasImageInput) {
+			const multimodalCandidate = await findFirstMultimodalCandidateId();
+			if (!multimodalCandidate) {
+				throw new Error(
+					"No multimodal models are configured for the router. Remove the image or enable a multimodal model."
+				);
+			}
+			try {
+				logger.info(
+					{ route: ROUTER_MULTIMODAL_ROUTE, model: multimodalCandidate },
+					"[router] multimodal input detected; bypassing Arch selection"
+				);
+				const ep = await createCandidateEndpoint(multimodalCandidate);
+				const gen = await ep({ ...params });
+				return metadataThenStream(gen, multimodalCandidate, ROUTER_MULTIMODAL_ROUTE);
+			} catch (e) {
+				logger.error(
+					{ route: ROUTER_MULTIMODAL_ROUTE, model: multimodalCandidate, err: String(e) },
+					"[router] multimodal fallback failed"
+				);
+				throw new Error(
+					"Failed to call the configured multimodal model. Remove the image or try again later."
+				);
+			}
+		}
+		const { routeName } = await archSelectRoute(sanitizedMessages);
+		const fallbackModel = config.LLM_ROUTER_FALLBACK_MODEL || routerModel.id;
+		const { candidates } = resolveRouteModels(routeName, routes, fallbackModel);
 		let lastErr: any = undefined;
 		for (const candidate of candidates) {
 			try {
 				logger.info({ route: routeName, model: candidate }, "[router] trying candidate");
 				const ep = await createCandidateEndpoint(candidate);
 				const gen = await ep({ ...params });
+				return metadataThenStream(gen, candidate, routeName);
 			} catch (e) {
 				lastErr = e;
 				logger.warn(