Spaces:

mishig
/

chat-ui

Running on CPU Upgrade

victor HF Staff commited on Oct 15, 2025

Commit

d321ce2

1 Parent(s): 93787c3

Cache models to avoid redundant dynamic imports

Introduces a module-level cache for models in endpoint.ts to prevent repeated dynamic imports on every request. This improves performance by loading models only once and reusing them for subsequent calls.

Files changed (1) hide show

src/lib/server/router/endpoint.ts +13 -4

src/lib/server/router/endpoint.ts CHANGED Viewed

@@ -17,6 +17,17 @@ const REASONING_BLOCK_REGEX = /<think>[\s\S]*?(?:<\/think>|$)/g;
 const ROUTER_MULTIMODAL_ROUTE = "multimodal";
 /**
  * Custom error class that preserves HTTP status codes
  */
@@ -115,8 +126,7 @@ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise<E
 			// Try to use the real candidate model config if present in chat-ui's model list
 			let modelForCall: ProcessedModel | undefined;
 			try {
-				const mod = await import("../models");
-				const all = (mod as { models: ProcessedModel[] }).models;
 				modelForCall = all?.find((m) => m.id === candidateModelId || m.name === candidateModelId);
 			} catch (e) {
 				logger.warn({ err: String(e) }, "[router] failed to load models for candidate lookup");
@@ -159,8 +169,7 @@ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise<E
 		async function findFirstMultimodalCandidateId(): Promise<string | undefined> {
 			try {
-				const mod = await import("../models");
-				const all = (mod as { models: ProcessedModel[] }).models;
 				// Check if a specific multimodal model is configured via env variable
 				const preferredModelId = config.LLM_ROUTER_MULTIMODAL_MODEL;

 const ROUTER_MULTIMODAL_ROUTE = "multimodal";
+// Cache models at module level to avoid redundant dynamic imports on every request
+let cachedModels: ProcessedModel[] | undefined;
+async function getModels(): Promise<ProcessedModel[]> {
+	if (!cachedModels) {
+		const mod = await import("../models");
+		cachedModels = (mod as { models: ProcessedModel[] }).models;
+	}
+	return cachedModels;
+}
 /**
  * Custom error class that preserves HTTP status codes
  */
 			// Try to use the real candidate model config if present in chat-ui's model list
 			let modelForCall: ProcessedModel | undefined;
 			try {
+				const all = await getModels();
 				modelForCall = all?.find((m) => m.id === candidateModelId || m.name === candidateModelId);
 			} catch (e) {
 				logger.warn({ err: String(e) }, "[router] failed to load models for candidate lookup");
 		async function findFirstMultimodalCandidateId(): Promise<string | undefined> {
 			try {
+				const all = await getModels();
 				// Check if a specific multimodal model is configured via env variable
 				const preferredModelId = config.LLM_ROUTER_MULTIMODAL_MODEL;