Cache models to avoid redundant dynamic imports
Browse filesIntroduces a module-level cache for models in endpoint.ts to prevent repeated dynamic imports on every request. This improves performance by loading models only once and reusing them for subsequent calls.
src/lib/server/router/endpoint.ts
CHANGED
|
@@ -17,6 +17,17 @@ const REASONING_BLOCK_REGEX = /<think>[\s\S]*?(?:<\/think>|$)/g;
|
|
| 17 |
|
| 18 |
const ROUTER_MULTIMODAL_ROUTE = "multimodal";
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
/**
|
| 21 |
* Custom error class that preserves HTTP status codes
|
| 22 |
*/
|
|
@@ -115,8 +126,7 @@ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise<E
|
|
| 115 |
// Try to use the real candidate model config if present in chat-ui's model list
|
| 116 |
let modelForCall: ProcessedModel | undefined;
|
| 117 |
try {
|
| 118 |
-
const
|
| 119 |
-
const all = (mod as { models: ProcessedModel[] }).models;
|
| 120 |
modelForCall = all?.find((m) => m.id === candidateModelId || m.name === candidateModelId);
|
| 121 |
} catch (e) {
|
| 122 |
logger.warn({ err: String(e) }, "[router] failed to load models for candidate lookup");
|
|
@@ -159,8 +169,7 @@ export async function makeRouterEndpoint(routerModel: ProcessedModel): Promise<E
|
|
| 159 |
|
| 160 |
async function findFirstMultimodalCandidateId(): Promise<string | undefined> {
|
| 161 |
try {
|
| 162 |
-
const
|
| 163 |
-
const all = (mod as { models: ProcessedModel[] }).models;
|
| 164 |
|
| 165 |
// Check if a specific multimodal model is configured via env variable
|
| 166 |
const preferredModelId = config.LLM_ROUTER_MULTIMODAL_MODEL;
|
|
|
|
| 17 |
|
| 18 |
const ROUTER_MULTIMODAL_ROUTE = "multimodal";
|
| 19 |
|
| 20 |
+
// Cache models at module level to avoid redundant dynamic imports on every request
|
| 21 |
+
let cachedModels: ProcessedModel[] | undefined;
|
| 22 |
+
|
| 23 |
+
async function getModels(): Promise<ProcessedModel[]> {
|
| 24 |
+
if (!cachedModels) {
|
| 25 |
+
const mod = await import("../models");
|
| 26 |
+
cachedModels = (mod as { models: ProcessedModel[] }).models;
|
| 27 |
+
}
|
| 28 |
+
return cachedModels;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
/**
|
| 32 |
* Custom error class that preserves HTTP status codes
|
| 33 |
*/
|
|
|
|
| 126 |
// Try to use the real candidate model config if present in chat-ui's model list
|
| 127 |
let modelForCall: ProcessedModel | undefined;
|
| 128 |
try {
|
| 129 |
+
const all = await getModels();
|
|
|
|
| 130 |
modelForCall = all?.find((m) => m.id === candidateModelId || m.name === candidateModelId);
|
| 131 |
} catch (e) {
|
| 132 |
logger.warn({ err: String(e) }, "[router] failed to load models for candidate lookup");
|
|
|
|
| 169 |
|
| 170 |
async function findFirstMultimodalCandidateId(): Promise<string | undefined> {
|
| 171 |
try {
|
| 172 |
+
const all = await getModels();
|
|
|
|
| 173 |
|
| 174 |
// Check if a specific multimodal model is configured via env variable
|
| 175 |
const preferredModelId = config.LLM_ROUTER_MULTIMODAL_MODEL;
|