victor HF Staff commited on
Commit
f8ddff7
·
unverified ·
1 Parent(s): 1d6db01

Refresh models (#1961)

Browse files

* Add model refresh API and refactor model loading

Introduces a POST /models/refresh endpoint for admins to trigger a model refresh and receive a summary of changes. Refactors model loading logic in models.ts to support dynamic refresh, exposes refreshModels and lastModelRefreshSummary, and adds a test script (test-refresh.mjs) to verify refresh functionality. Minor code cleanup and improved override handling included.

* Delete test-refresh.mjs

src/lib/components/chat/ChatMessage.svelte CHANGED
@@ -68,7 +68,7 @@
68
  let editFormEl: HTMLFormElement | undefined = $state();
69
 
70
  // Zero-config reasoning autodetection: detect <think> blocks in content
71
- const THINK_BLOCK_REGEX = /(<think>[\s\S]*?(?:<\/think>|$))/gi;
72
  let hasClientThink = $derived(message.content.split(THINK_BLOCK_REGEX).length > 1);
73
 
74
  // Strip think blocks for clipboard copy (always, regardless of detection)
 
68
  let editFormEl: HTMLFormElement | undefined = $state();
69
 
70
  // Zero-config reasoning autodetection: detect <think> blocks in content
71
+ const THINK_BLOCK_REGEX = /(<think>[\s\S]*?(?:<\/think>|$))/gi;
72
  let hasClientThink = $derived(message.content.split(THINK_BLOCK_REGEX).length > 1);
73
 
74
  // Strip think blocks for clipboard copy (always, regardless of detection)
src/lib/server/api/routes/groups/models.ts CHANGED
@@ -1,4 +1,5 @@
1
- import { Elysia } from "elysia";
 
2
  import type { BackendModel } from "$lib/server/models";
3
  import { authPlugin } from "../../authPlugin";
4
  import { authCondition } from "$lib/server/auth";
@@ -70,6 +71,42 @@ export const modelGroup = new Elysia().group("/models", (app) =>
70
  .get("/old", async () => {
71
  return [] as GETOldModelsResponse;
72
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  .group("/:namespace/:model?", (app) =>
74
  app
75
  .derive(async ({ params, error }) => {
 
1
+ import { Elysia, status } from "elysia";
2
+ import { refreshModels, lastModelRefreshSummary } from "$lib/server/models";
3
  import type { BackendModel } from "$lib/server/models";
4
  import { authPlugin } from "../../authPlugin";
5
  import { authCondition } from "$lib/server/auth";
 
71
  .get("/old", async () => {
72
  return [] as GETOldModelsResponse;
73
  })
74
+ .group("/refresh", (app) =>
75
+ app.use(authPlugin).post("", async ({ locals }) => {
76
+ if (!locals.user && !locals.sessionId) {
77
+ throw status(401, "Unauthorized");
78
+ }
79
+ if (!locals.isAdmin) {
80
+ throw status(403, "Admin privileges required");
81
+ }
82
+
83
+ const previous = lastModelRefreshSummary;
84
+
85
+ try {
86
+ const summary = await refreshModels();
87
+
88
+ return {
89
+ refreshedAt: summary.refreshedAt.toISOString(),
90
+ durationMs: summary.durationMs,
91
+ added: summary.added,
92
+ removed: summary.removed,
93
+ changed: summary.changed,
94
+ total: summary.total,
95
+ hadChanges:
96
+ summary.added.length > 0 || summary.removed.length > 0 || summary.changed.length > 0,
97
+ previous:
98
+ previous.refreshedAt.getTime() > 0
99
+ ? {
100
+ refreshedAt: previous.refreshedAt.toISOString(),
101
+ total: previous.total,
102
+ }
103
+ : null,
104
+ };
105
+ } catch (err) {
106
+ throw status(502, "Model refresh failed");
107
+ }
108
+ })
109
+ )
110
  .group("/:namespace/:model?", (app) =>
111
  app
112
  .derive(async ({ params, error }) => {
src/lib/server/models.ts CHANGED
@@ -76,18 +76,229 @@ const overrideEntrySchema = modelConfig
76
 
77
  type ModelOverride = z.infer<typeof overrideEntrySchema>;
78
 
79
- // ggufModelsConfig unused in this build
80
-
81
- // Source models exclusively from an OpenAI-compatible endpoint.
82
- let modelsRaw: ModelConfig[] = [];
83
-
84
- // Require explicit base URL; no implicit default here
85
  const openaiBaseUrl = config.OPENAI_BASE_URL
86
  ? config.OPENAI_BASE_URL.replace(/\/$/, "")
87
  : undefined;
88
  const isHFRouter = openaiBaseUrl === "https://router.huggingface.co/v1";
89
 
90
- if (openaiBaseUrl) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  try {
92
  const baseURL = openaiBaseUrl;
93
  logger.info({ baseURL }, "[models] Using OpenAI-compatible base URL");
@@ -113,30 +324,10 @@ if (openaiBaseUrl) {
113
  const json = await response.json();
114
  logger.info({ keys: Object.keys(json || {}) }, "[models] Response keys");
115
 
116
- const listSchema = z
117
- .object({
118
- data: z.array(
119
- z.object({
120
- id: z.string(),
121
- description: z.string().optional(),
122
- providers: z
123
- .array(z.object({ supports_tools: z.boolean().optional() }).passthrough())
124
- .optional(),
125
- architecture: z
126
- .object({
127
- input_modalities: z.array(z.string()).optional(),
128
- })
129
- .passthrough()
130
- .optional(),
131
- })
132
- ),
133
- })
134
- .passthrough();
135
-
136
  const parsed = listSchema.parse(json);
137
  logger.info({ count: parsed.data.length }, "[models] Parsed models count");
138
 
139
- modelsRaw = parsed.data.map((m) => {
140
  let logoUrl: string | undefined = undefined;
141
  if (isHFRouter && m.id.includes("/")) {
142
  const org = m.id.split("/")[0];
@@ -166,163 +357,118 @@ if (openaiBaseUrl) {
166
  ],
167
  } as ModelConfig;
168
  }) as ModelConfig[];
169
- } catch (e) {
170
- logger.error(e, "Failed to load models from OpenAI base URL");
171
- throw e;
172
- }
173
- } else {
174
- logger.error(
175
- "OPENAI_BASE_URL is required. Set it to an OpenAI-compatible base (e.g., https://router.huggingface.co/v1)."
176
- );
177
- throw new Error("OPENAI_BASE_URL not set");
178
- }
179
-
180
- let modelOverrides: ModelOverride[] = [];
181
- const overridesEnv = (Reflect.get(config, "MODELS") as string | undefined) ?? "";
182
-
183
- if (overridesEnv.trim()) {
184
- try {
185
- modelOverrides = z
186
- .array(overrideEntrySchema)
187
- .parse(JSON5.parse(sanitizeJSONEnv(overridesEnv, "[]")));
188
- } catch (error) {
189
- logger.error(error, "[models] Failed to parse MODELS overrides");
190
- }
191
- }
192
 
193
- if (modelOverrides.length) {
194
- const overrideMap = new Map<string, ModelOverride>();
195
- for (const override of modelOverrides) {
196
- for (const key of [override.id, override.name]) {
197
- const trimmed = key?.trim();
198
- if (trimmed) overrideMap.set(trimmed, override);
199
- }
200
- }
201
 
202
- modelsRaw = modelsRaw.map((model) => {
203
- const override = overrideMap.get(model.id ?? "") ?? overrideMap.get(model.name ?? "");
204
- if (!override) return model;
 
 
 
 
 
205
 
206
- const { id, name, ...rest } = override;
207
- void id;
208
- void name;
209
 
210
- return {
211
- ...model,
212
- ...rest,
213
- };
214
- });
215
- }
216
 
217
- function getChatPromptRender(_m: ModelConfig): (inputs: ChatTemplateInput) => string {
218
- // Minimal template to support legacy "completions" flow if ever used.
219
- // We avoid any tokenizer/Jinja usage in this build.
220
- return ({ messages, preprompt }) => {
221
- const parts: string[] = [];
222
- if (preprompt) parts.push(`[SYSTEM]\n${preprompt}`);
223
- for (const msg of messages) {
224
- const role = msg.from === "assistant" ? "ASSISTANT" : msg.from.toUpperCase();
225
- parts.push(`[${role}]\n${msg.content}`);
226
  }
227
- parts.push(`[ASSISTANT]`);
228
- return parts.join("\n\n");
229
- };
230
- }
231
 
232
- const processModel = async (m: ModelConfig) => ({
233
- ...m,
234
- chatPromptRender: await getChatPromptRender(m),
235
- id: m.id || m.name,
236
- displayName: m.displayName || m.name,
237
- preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
238
- parameters: { ...m.parameters, stop_sequences: m.parameters?.stop },
239
- unlisted: m.unlisted ?? false,
240
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- const addEndpoint = (m: Awaited<ReturnType<typeof processModel>>) => ({
243
- ...m,
244
- getEndpoint: async (): Promise<Endpoint> => {
245
- if (!m.endpoints || m.endpoints.length === 0) {
246
- throw new Error("No endpoints configured. This build requires OpenAI-compatible endpoints.");
247
- }
248
- // Only support OpenAI-compatible endpoints in this build
249
- const endpoint = m.endpoints[0];
250
- if (endpoint.type !== "openai") {
251
- throw new Error("Only 'openai' endpoint type is supported in this build");
252
- }
253
- return await endpoints.openai({ ...endpoint, model: m });
254
- },
255
- });
256
 
257
- const inferenceApiIds: string[] = [];
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
- const builtModels = await Promise.all(
260
- modelsRaw.map((e) =>
261
- processModel(e)
262
- .then(addEndpoint)
263
- .then(async (m) => ({
264
- ...m,
265
- hasInferenceAPI: inferenceApiIds.includes(m.id ?? m.name),
266
- // router decoration added later
267
- isRouter: false as boolean,
268
- }))
269
- )
270
- );
271
-
272
- // Inject a synthetic router alias ("Omni") if Arch router is configured
273
- const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim();
274
- const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni";
275
- const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim();
276
- const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni";
277
- const routerMultimodalEnabled =
278
- (config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true";
279
-
280
- let decorated = builtModels as ProcessedModel[];
281
-
282
- if (archBase) {
283
- // Build a minimal model config for the alias
284
- const aliasRaw: ModelConfig = {
285
- id: routerAliasId,
286
- name: routerAliasId,
287
- displayName: routerLabel,
288
- logoUrl: routerLogo || undefined,
289
- preprompt: "",
290
- endpoints: [
291
- {
292
- type: "openai" as const,
293
- baseURL: openaiBaseUrl,
294
- },
295
- ],
296
- // Keep the alias visible
297
- unlisted: false,
298
- } as ProcessedModel;
299
-
300
- if (routerMultimodalEnabled) {
301
- aliasRaw.multimodal = true;
302
- aliasRaw.multimodalAcceptedMimetypes = ["image/*"];
303
  }
 
304
 
305
- const aliasBase = await processModel(aliasRaw);
306
- // Create a self-referential ProcessedModel for the router endpoint
307
- const aliasModel: ProcessedModel = {
308
- ...aliasBase,
309
- isRouter: true,
310
- // getEndpoint uses the router wrapper regardless of the endpoints array
311
- getEndpoint: async (): Promise<Endpoint> => makeRouterEndpoint(aliasModel),
312
- } as ProcessedModel;
313
-
314
- // Put alias first
315
- decorated = [aliasModel, ...decorated];
316
- }
317
 
318
- export const models = decorated as typeof builtModels;
319
 
320
- export type ProcessedModel = (typeof models)[number] & { isRouter?: boolean };
 
 
 
321
 
322
- // super ugly but not sure how to make typescript happier
323
- export const validModelIdSchema = z.enum(models.map((m) => m.id) as [string, ...string[]]);
 
324
 
325
- export const defaultModel = models[0];
 
326
 
327
  export const validateModel = (_models: BackendModel[]) => {
328
  // Zod enum function requires 2 parameters
@@ -331,13 +477,6 @@ export const validateModel = (_models: BackendModel[]) => {
331
 
332
  // if `TASK_MODEL` is string & name of a model in `MODELS`, then we use `MODELS[TASK_MODEL]`, else we try to parse `TASK_MODEL` as a model config itself
333
 
334
- export const taskModel = addEndpoint(
335
- config.TASK_MODEL
336
- ? (models.find((m) => m.name === config.TASK_MODEL || m.id === config.TASK_MODEL) ??
337
- defaultModel)
338
- : defaultModel
339
- );
340
-
341
  export type BackendModel = Optional<
342
  typeof defaultModel,
343
  "preprompt" | "parameters" | "multimodal" | "unlisted" | "hasInferenceAPI"
 
76
 
77
  type ModelOverride = z.infer<typeof overrideEntrySchema>;
78
 
 
 
 
 
 
 
79
  const openaiBaseUrl = config.OPENAI_BASE_URL
80
  ? config.OPENAI_BASE_URL.replace(/\/$/, "")
81
  : undefined;
82
  const isHFRouter = openaiBaseUrl === "https://router.huggingface.co/v1";
83
 
84
+ const listSchema = z
85
+ .object({
86
+ data: z.array(
87
+ z.object({
88
+ id: z.string(),
89
+ description: z.string().optional(),
90
+ providers: z
91
+ .array(z.object({ supports_tools: z.boolean().optional() }).passthrough())
92
+ .optional(),
93
+ architecture: z
94
+ .object({
95
+ input_modalities: z.array(z.string()).optional(),
96
+ })
97
+ .passthrough()
98
+ .optional(),
99
+ })
100
+ ),
101
+ })
102
+ .passthrough();
103
+
104
+ function getChatPromptRender(_m: ModelConfig): (inputs: ChatTemplateInput) => string {
105
+ // Minimal template to support legacy "completions" flow if ever used.
106
+ // We avoid any tokenizer/Jinja usage in this build.
107
+ return ({ messages, preprompt }) => {
108
+ const parts: string[] = [];
109
+ if (preprompt) parts.push(`[SYSTEM]\n${preprompt}`);
110
+ for (const msg of messages) {
111
+ const role = msg.from === "assistant" ? "ASSISTANT" : msg.from.toUpperCase();
112
+ parts.push(`[${role}]\n${msg.content}`);
113
+ }
114
+ parts.push(`[ASSISTANT]`);
115
+ return parts.join("\n\n");
116
+ };
117
+ }
118
+
119
+ const processModel = async (m: ModelConfig) => ({
120
+ ...m,
121
+ chatPromptRender: await getChatPromptRender(m),
122
+ id: m.id || m.name,
123
+ displayName: m.displayName || m.name,
124
+ preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
125
+ parameters: { ...m.parameters, stop_sequences: m.parameters?.stop },
126
+ unlisted: m.unlisted ?? false,
127
+ });
128
+
129
+ const addEndpoint = (m: Awaited<ReturnType<typeof processModel>>) => ({
130
+ ...m,
131
+ getEndpoint: async (): Promise<Endpoint> => {
132
+ if (!m.endpoints || m.endpoints.length === 0) {
133
+ throw new Error("No endpoints configured. This build requires OpenAI-compatible endpoints.");
134
+ }
135
+ // Only support OpenAI-compatible endpoints in this build
136
+ const endpoint = m.endpoints[0];
137
+ if (endpoint.type !== "openai") {
138
+ throw new Error("Only 'openai' endpoint type is supported in this build");
139
+ }
140
+ return await endpoints.openai({ ...endpoint, model: m });
141
+ },
142
+ });
143
+
144
+ type InternalProcessedModel = Awaited<ReturnType<typeof addEndpoint>> & {
145
+ isRouter: boolean;
146
+ hasInferenceAPI: boolean;
147
+ };
148
+
149
+ const inferenceApiIds: string[] = [];
150
+
151
+ const getModelOverrides = (): ModelOverride[] => {
152
+ const overridesEnv = (Reflect.get(config, "MODELS") as string | undefined) ?? "";
153
+
154
+ if (!overridesEnv.trim()) {
155
+ return [];
156
+ }
157
+
158
+ try {
159
+ return z.array(overrideEntrySchema).parse(JSON5.parse(sanitizeJSONEnv(overridesEnv, "[]")));
160
+ } catch (error) {
161
+ logger.error(error, "[models] Failed to parse MODELS overrides");
162
+ return [];
163
+ }
164
+ };
165
+
166
+ export type ModelsRefreshSummary = {
167
+ refreshedAt: Date;
168
+ durationMs: number;
169
+ added: string[];
170
+ removed: string[];
171
+ changed: string[];
172
+ total: number;
173
+ };
174
+
175
+ export type ProcessedModel = InternalProcessedModel;
176
+
177
+ export let models: ProcessedModel[] = [];
178
+ export let defaultModel!: ProcessedModel;
179
+ export let taskModel!: ProcessedModel;
180
+ export let validModelIdSchema: z.ZodType<string> = z.string();
181
+ export let lastModelRefresh = new Date(0);
182
+ export let lastModelRefreshDurationMs = 0;
183
+ export let lastModelRefreshSummary: ModelsRefreshSummary = {
184
+ refreshedAt: new Date(0),
185
+ durationMs: 0,
186
+ added: [],
187
+ removed: [],
188
+ changed: [],
189
+ total: 0,
190
+ };
191
+
192
+ let inflightRefresh: Promise<ModelsRefreshSummary> | null = null;
193
+
194
+ const createValidModelIdSchema = (modelList: ProcessedModel[]): z.ZodType<string> => {
195
+ if (modelList.length === 0) {
196
+ throw new Error("No models available to build validation schema");
197
+ }
198
+ const ids = new Set(modelList.map((m) => m.id));
199
+ return z.string().refine((value) => ids.has(value), "Invalid model id");
200
+ };
201
+
202
+ const resolveTaskModel = (modelList: ProcessedModel[]) => {
203
+ if (modelList.length === 0) {
204
+ throw new Error("No models available to select task model");
205
+ }
206
+
207
+ if (config.TASK_MODEL) {
208
+ const preferred = modelList.find(
209
+ (m) => m.name === config.TASK_MODEL || m.id === config.TASK_MODEL
210
+ );
211
+ if (preferred) {
212
+ return preferred;
213
+ }
214
+ }
215
+
216
+ return modelList[0];
217
+ };
218
+
219
+ const signatureForModel = (model: ProcessedModel) =>
220
+ JSON.stringify({
221
+ description: model.description,
222
+ displayName: model.displayName,
223
+ providers: model.providers,
224
+ parameters: model.parameters,
225
+ preprompt: model.preprompt,
226
+ prepromptUrl: model.prepromptUrl,
227
+ endpoints:
228
+ model.endpoints?.map((endpoint) => {
229
+ if (endpoint.type === "openai") {
230
+ const { type, baseURL } = endpoint;
231
+ return { type, baseURL };
232
+ }
233
+ return { type: endpoint.type };
234
+ }) ?? null,
235
+ multimodal: model.multimodal,
236
+ multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes,
237
+ isRouter: model.isRouter,
238
+ hasInferenceAPI: model.hasInferenceAPI,
239
+ });
240
+
241
+ const applyModelState = (newModels: ProcessedModel[], startedAt: number): ModelsRefreshSummary => {
242
+ if (newModels.length === 0) {
243
+ throw new Error("Failed to load any models from upstream");
244
+ }
245
+
246
+ const previousIds = new Set(models.map((m) => m.id));
247
+ const previousSignatures = new Map(models.map((m) => [m.id, signatureForModel(m)]));
248
+ const refreshedAt = new Date();
249
+ const durationMs = Date.now() - startedAt;
250
+
251
+ models = newModels;
252
+ defaultModel = models[0];
253
+ taskModel = resolveTaskModel(models);
254
+ validModelIdSchema = createValidModelIdSchema(models);
255
+ lastModelRefresh = refreshedAt;
256
+ lastModelRefreshDurationMs = durationMs;
257
+
258
+ const added = newModels.map((m) => m.id).filter((id) => !previousIds.has(id));
259
+ const removed = Array.from(previousIds).filter(
260
+ (id) => !newModels.some((model) => model.id === id)
261
+ );
262
+ const changed = newModels
263
+ .filter((model) => {
264
+ const previousSignature = previousSignatures.get(model.id);
265
+ return previousSignature !== undefined && previousSignature !== signatureForModel(model);
266
+ })
267
+ .map((model) => model.id);
268
+
269
+ const summary: ModelsRefreshSummary = {
270
+ refreshedAt,
271
+ durationMs,
272
+ added,
273
+ removed,
274
+ changed,
275
+ total: models.length,
276
+ };
277
+
278
+ lastModelRefreshSummary = summary;
279
+
280
+ logger.info(
281
+ {
282
+ total: summary.total,
283
+ added: summary.added,
284
+ removed: summary.removed,
285
+ changed: summary.changed,
286
+ durationMs: summary.durationMs,
287
+ },
288
+ "[models] Model cache refreshed"
289
+ );
290
+
291
+ return summary;
292
+ };
293
+
294
+ const buildModels = async (): Promise<ProcessedModel[]> => {
295
+ if (!openaiBaseUrl) {
296
+ logger.error(
297
+ "OPENAI_BASE_URL is required. Set it to an OpenAI-compatible base (e.g., https://router.huggingface.co/v1)."
298
+ );
299
+ throw new Error("OPENAI_BASE_URL not set");
300
+ }
301
+
302
  try {
303
  const baseURL = openaiBaseUrl;
304
  logger.info({ baseURL }, "[models] Using OpenAI-compatible base URL");
 
324
  const json = await response.json();
325
  logger.info({ keys: Object.keys(json || {}) }, "[models] Response keys");
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  const parsed = listSchema.parse(json);
328
  logger.info({ count: parsed.data.length }, "[models] Parsed models count");
329
 
330
+ let modelsRaw = parsed.data.map((m) => {
331
  let logoUrl: string | undefined = undefined;
332
  if (isHFRouter && m.id.includes("/")) {
333
  const org = m.id.split("/")[0];
 
357
  ],
358
  } as ModelConfig;
359
  }) as ModelConfig[];
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
361
+ const overrides = getModelOverrides();
 
 
 
 
 
 
 
362
 
363
+ if (overrides.length) {
364
+ const overrideMap = new Map<string, ModelOverride>();
365
+ for (const override of overrides) {
366
+ for (const key of [override.id, override.name]) {
367
+ const trimmed = key?.trim();
368
+ if (trimmed) overrideMap.set(trimmed, override);
369
+ }
370
+ }
371
 
372
+ modelsRaw = modelsRaw.map((model) => {
373
+ const override = overrideMap.get(model.id ?? "") ?? overrideMap.get(model.name ?? "");
374
+ if (!override) return model;
375
 
376
+ const { id, name, ...rest } = override;
377
+ void id;
378
+ void name;
 
 
 
379
 
380
+ return {
381
+ ...model,
382
+ ...rest,
383
+ };
384
+ });
 
 
 
 
385
  }
 
 
 
 
386
 
387
+ const builtModels = await Promise.all(
388
+ modelsRaw.map((e) =>
389
+ processModel(e)
390
+ .then(addEndpoint)
391
+ .then(async (m) => ({
392
+ ...m,
393
+ hasInferenceAPI: inferenceApiIds.includes(m.id ?? m.name),
394
+ // router decoration added later
395
+ isRouter: false as boolean,
396
+ }))
397
+ )
398
+ );
399
+
400
+ const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim();
401
+ const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni";
402
+ const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim();
403
+ const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni";
404
+ const routerMultimodalEnabled =
405
+ (config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true";
406
+
407
+ let decorated = builtModels as ProcessedModel[];
408
+
409
+ if (archBase) {
410
+ // Build a minimal model config for the alias
411
+ const aliasRaw = {
412
+ id: routerAliasId,
413
+ name: routerAliasId,
414
+ displayName: routerLabel,
415
+ logoUrl: routerLogo || undefined,
416
+ preprompt: "",
417
+ endpoints: [
418
+ {
419
+ type: "openai" as const,
420
+ baseURL: openaiBaseUrl,
421
+ },
422
+ ],
423
+ // Keep the alias visible
424
+ unlisted: false,
425
+ } as ModelConfig;
426
 
427
+ if (routerMultimodalEnabled) {
428
+ aliasRaw.multimodal = true;
429
+ aliasRaw.multimodalAcceptedMimetypes = ["image/*"];
430
+ }
 
 
 
 
 
 
 
 
 
 
431
 
432
+ const aliasBase = await processModel(aliasRaw);
433
+ // Create a self-referential ProcessedModel for the router endpoint
434
+ const aliasModel: ProcessedModel = {
435
+ ...aliasBase,
436
+ isRouter: true,
437
+ hasInferenceAPI: false,
438
+ // getEndpoint uses the router wrapper regardless of the endpoints array
439
+ getEndpoint: async (): Promise<Endpoint> => makeRouterEndpoint(aliasModel),
440
+ } as ProcessedModel;
441
+
442
+ // Put alias first
443
+ decorated = [aliasModel, ...decorated];
444
+ }
445
 
446
+ return decorated;
447
+ } catch (e) {
448
+ logger.error(e, "Failed to load models from OpenAI base URL");
449
+ throw e;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
+ };
452
 
453
+ const rebuildModels = async (): Promise<ModelsRefreshSummary> => {
454
+ const startedAt = Date.now();
455
+ const newModels = await buildModels();
456
+ return applyModelState(newModels, startedAt);
457
+ };
 
 
 
 
 
 
 
458
 
459
+ await rebuildModels();
460
 
461
+ export const refreshModels = async (): Promise<ModelsRefreshSummary> => {
462
+ if (inflightRefresh) {
463
+ return inflightRefresh;
464
+ }
465
 
466
+ inflightRefresh = rebuildModels().finally(() => {
467
+ inflightRefresh = null;
468
+ });
469
 
470
+ return inflightRefresh;
471
+ };
472
 
473
  export const validateModel = (_models: BackendModel[]) => {
474
  // Zod enum function requires 2 parameters
 
477
 
478
  // if `TASK_MODEL` is string & name of a model in `MODELS`, then we use `MODELS[TASK_MODEL]`, else we try to parse `TASK_MODEL` as a model config itself
479
 
 
 
 
 
 
 
 
480
  export type BackendModel = Optional<
481
  typeof defaultModel,
482
  "preprompt" | "parameters" | "multimodal" | "unlisted" | "hasInferenceAPI"