Refresh models (#1961)
Browse files* Add model refresh API and refactor model loading
Introduces a POST /models/refresh endpoint for admins to trigger a model refresh and receive a summary of changes. Refactors model loading logic in models.ts to support dynamic refresh, exposes refreshModels and lastModelRefreshSummary, and adds a test script (test-refresh.mjs) to verify refresh functionality. Minor code cleanup and improved override handling included.
* Delete test-refresh.mjs
src/lib/components/chat/ChatMessage.svelte
CHANGED
|
@@ -68,7 +68,7 @@
|
|
| 68 |
let editFormEl: HTMLFormElement | undefined = $state();
|
| 69 |
|
| 70 |
// Zero-config reasoning autodetection: detect <think> blocks in content
|
| 71 |
-
|
| 72 |
let hasClientThink = $derived(message.content.split(THINK_BLOCK_REGEX).length > 1);
|
| 73 |
|
| 74 |
// Strip think blocks for clipboard copy (always, regardless of detection)
|
|
|
|
| 68 |
let editFormEl: HTMLFormElement | undefined = $state();
|
| 69 |
|
| 70 |
// Zero-config reasoning autodetection: detect <think> blocks in content
|
| 71 |
+
const THINK_BLOCK_REGEX = /(<think>[\s\S]*?(?:<\/think>|$))/gi;
|
| 72 |
let hasClientThink = $derived(message.content.split(THINK_BLOCK_REGEX).length > 1);
|
| 73 |
|
| 74 |
// Strip think blocks for clipboard copy (always, regardless of detection)
|
src/lib/server/api/routes/groups/models.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
import { Elysia } from "elysia";
|
|
|
|
| 2 |
import type { BackendModel } from "$lib/server/models";
|
| 3 |
import { authPlugin } from "../../authPlugin";
|
| 4 |
import { authCondition } from "$lib/server/auth";
|
|
@@ -70,6 +71,42 @@ export const modelGroup = new Elysia().group("/models", (app) =>
|
|
| 70 |
.get("/old", async () => {
|
| 71 |
return [] as GETOldModelsResponse;
|
| 72 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
.group("/:namespace/:model?", (app) =>
|
| 74 |
app
|
| 75 |
.derive(async ({ params, error }) => {
|
|
|
|
| 1 |
+
import { Elysia, status } from "elysia";
|
| 2 |
+
import { refreshModels, lastModelRefreshSummary } from "$lib/server/models";
|
| 3 |
import type { BackendModel } from "$lib/server/models";
|
| 4 |
import { authPlugin } from "../../authPlugin";
|
| 5 |
import { authCondition } from "$lib/server/auth";
|
|
|
|
| 71 |
.get("/old", async () => {
|
| 72 |
return [] as GETOldModelsResponse;
|
| 73 |
})
|
| 74 |
+
.group("/refresh", (app) =>
|
| 75 |
+
app.use(authPlugin).post("", async ({ locals }) => {
|
| 76 |
+
if (!locals.user && !locals.sessionId) {
|
| 77 |
+
throw status(401, "Unauthorized");
|
| 78 |
+
}
|
| 79 |
+
if (!locals.isAdmin) {
|
| 80 |
+
throw status(403, "Admin privileges required");
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
const previous = lastModelRefreshSummary;
|
| 84 |
+
|
| 85 |
+
try {
|
| 86 |
+
const summary = await refreshModels();
|
| 87 |
+
|
| 88 |
+
return {
|
| 89 |
+
refreshedAt: summary.refreshedAt.toISOString(),
|
| 90 |
+
durationMs: summary.durationMs,
|
| 91 |
+
added: summary.added,
|
| 92 |
+
removed: summary.removed,
|
| 93 |
+
changed: summary.changed,
|
| 94 |
+
total: summary.total,
|
| 95 |
+
hadChanges:
|
| 96 |
+
summary.added.length > 0 || summary.removed.length > 0 || summary.changed.length > 0,
|
| 97 |
+
previous:
|
| 98 |
+
previous.refreshedAt.getTime() > 0
|
| 99 |
+
? {
|
| 100 |
+
refreshedAt: previous.refreshedAt.toISOString(),
|
| 101 |
+
total: previous.total,
|
| 102 |
+
}
|
| 103 |
+
: null,
|
| 104 |
+
};
|
| 105 |
+
} catch (err) {
|
| 106 |
+
throw status(502, "Model refresh failed");
|
| 107 |
+
}
|
| 108 |
+
})
|
| 109 |
+
)
|
| 110 |
.group("/:namespace/:model?", (app) =>
|
| 111 |
app
|
| 112 |
.derive(async ({ params, error }) => {
|
src/lib/server/models.ts
CHANGED
|
@@ -76,18 +76,229 @@ const overrideEntrySchema = modelConfig
|
|
| 76 |
|
| 77 |
type ModelOverride = z.infer<typeof overrideEntrySchema>;
|
| 78 |
|
| 79 |
-
// ggufModelsConfig unused in this build
|
| 80 |
-
|
| 81 |
-
// Source models exclusively from an OpenAI-compatible endpoint.
|
| 82 |
-
let modelsRaw: ModelConfig[] = [];
|
| 83 |
-
|
| 84 |
-
// Require explicit base URL; no implicit default here
|
| 85 |
const openaiBaseUrl = config.OPENAI_BASE_URL
|
| 86 |
? config.OPENAI_BASE_URL.replace(/\/$/, "")
|
| 87 |
: undefined;
|
| 88 |
const isHFRouter = openaiBaseUrl === "https://router.huggingface.co/v1";
|
| 89 |
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
try {
|
| 92 |
const baseURL = openaiBaseUrl;
|
| 93 |
logger.info({ baseURL }, "[models] Using OpenAI-compatible base URL");
|
|
@@ -113,30 +324,10 @@ if (openaiBaseUrl) {
|
|
| 113 |
const json = await response.json();
|
| 114 |
logger.info({ keys: Object.keys(json || {}) }, "[models] Response keys");
|
| 115 |
|
| 116 |
-
const listSchema = z
|
| 117 |
-
.object({
|
| 118 |
-
data: z.array(
|
| 119 |
-
z.object({
|
| 120 |
-
id: z.string(),
|
| 121 |
-
description: z.string().optional(),
|
| 122 |
-
providers: z
|
| 123 |
-
.array(z.object({ supports_tools: z.boolean().optional() }).passthrough())
|
| 124 |
-
.optional(),
|
| 125 |
-
architecture: z
|
| 126 |
-
.object({
|
| 127 |
-
input_modalities: z.array(z.string()).optional(),
|
| 128 |
-
})
|
| 129 |
-
.passthrough()
|
| 130 |
-
.optional(),
|
| 131 |
-
})
|
| 132 |
-
),
|
| 133 |
-
})
|
| 134 |
-
.passthrough();
|
| 135 |
-
|
| 136 |
const parsed = listSchema.parse(json);
|
| 137 |
logger.info({ count: parsed.data.length }, "[models] Parsed models count");
|
| 138 |
|
| 139 |
-
modelsRaw = parsed.data.map((m) => {
|
| 140 |
let logoUrl: string | undefined = undefined;
|
| 141 |
if (isHFRouter && m.id.includes("/")) {
|
| 142 |
const org = m.id.split("/")[0];
|
|
@@ -166,163 +357,118 @@ if (openaiBaseUrl) {
|
|
| 166 |
],
|
| 167 |
} as ModelConfig;
|
| 168 |
}) as ModelConfig[];
|
| 169 |
-
} catch (e) {
|
| 170 |
-
logger.error(e, "Failed to load models from OpenAI base URL");
|
| 171 |
-
throw e;
|
| 172 |
-
}
|
| 173 |
-
} else {
|
| 174 |
-
logger.error(
|
| 175 |
-
"OPENAI_BASE_URL is required. Set it to an OpenAI-compatible base (e.g., https://router.huggingface.co/v1)."
|
| 176 |
-
);
|
| 177 |
-
throw new Error("OPENAI_BASE_URL not set");
|
| 178 |
-
}
|
| 179 |
-
|
| 180 |
-
let modelOverrides: ModelOverride[] = [];
|
| 181 |
-
const overridesEnv = (Reflect.get(config, "MODELS") as string | undefined) ?? "";
|
| 182 |
-
|
| 183 |
-
if (overridesEnv.trim()) {
|
| 184 |
-
try {
|
| 185 |
-
modelOverrides = z
|
| 186 |
-
.array(overrideEntrySchema)
|
| 187 |
-
.parse(JSON5.parse(sanitizeJSONEnv(overridesEnv, "[]")));
|
| 188 |
-
} catch (error) {
|
| 189 |
-
logger.error(error, "[models] Failed to parse MODELS overrides");
|
| 190 |
-
}
|
| 191 |
-
}
|
| 192 |
|
| 193 |
-
|
| 194 |
-
const overrideMap = new Map<string, ModelOverride>();
|
| 195 |
-
for (const override of modelOverrides) {
|
| 196 |
-
for (const key of [override.id, override.name]) {
|
| 197 |
-
const trimmed = key?.trim();
|
| 198 |
-
if (trimmed) overrideMap.set(trimmed, override);
|
| 199 |
-
}
|
| 200 |
-
}
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
};
|
| 214 |
-
});
|
| 215 |
-
}
|
| 216 |
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
if (preprompt) parts.push(`[SYSTEM]\n${preprompt}`);
|
| 223 |
-
for (const msg of messages) {
|
| 224 |
-
const role = msg.from === "assistant" ? "ASSISTANT" : msg.from.toUpperCase();
|
| 225 |
-
parts.push(`[${role}]\n${msg.content}`);
|
| 226 |
}
|
| 227 |
-
parts.push(`[ASSISTANT]`);
|
| 228 |
-
return parts.join("\n\n");
|
| 229 |
-
};
|
| 230 |
-
}
|
| 231 |
|
| 232 |
-
const
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
throw new Error("No endpoints configured. This build requires OpenAI-compatible endpoints.");
|
| 247 |
-
}
|
| 248 |
-
// Only support OpenAI-compatible endpoints in this build
|
| 249 |
-
const endpoint = m.endpoints[0];
|
| 250 |
-
if (endpoint.type !== "openai") {
|
| 251 |
-
throw new Error("Only 'openai' endpoint type is supported in this build");
|
| 252 |
-
}
|
| 253 |
-
return await endpoints.openai({ ...endpoint, model: m });
|
| 254 |
-
},
|
| 255 |
-
});
|
| 256 |
|
| 257 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
.then(async (m) => ({
|
| 264 |
-
...m,
|
| 265 |
-
hasInferenceAPI: inferenceApiIds.includes(m.id ?? m.name),
|
| 266 |
-
// router decoration added later
|
| 267 |
-
isRouter: false as boolean,
|
| 268 |
-
}))
|
| 269 |
-
)
|
| 270 |
-
);
|
| 271 |
-
|
| 272 |
-
// Inject a synthetic router alias ("Omni") if Arch router is configured
|
| 273 |
-
const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim();
|
| 274 |
-
const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni";
|
| 275 |
-
const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim();
|
| 276 |
-
const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni";
|
| 277 |
-
const routerMultimodalEnabled =
|
| 278 |
-
(config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true";
|
| 279 |
-
|
| 280 |
-
let decorated = builtModels as ProcessedModel[];
|
| 281 |
-
|
| 282 |
-
if (archBase) {
|
| 283 |
-
// Build a minimal model config for the alias
|
| 284 |
-
const aliasRaw: ModelConfig = {
|
| 285 |
-
id: routerAliasId,
|
| 286 |
-
name: routerAliasId,
|
| 287 |
-
displayName: routerLabel,
|
| 288 |
-
logoUrl: routerLogo || undefined,
|
| 289 |
-
preprompt: "",
|
| 290 |
-
endpoints: [
|
| 291 |
-
{
|
| 292 |
-
type: "openai" as const,
|
| 293 |
-
baseURL: openaiBaseUrl,
|
| 294 |
-
},
|
| 295 |
-
],
|
| 296 |
-
// Keep the alias visible
|
| 297 |
-
unlisted: false,
|
| 298 |
-
} as ProcessedModel;
|
| 299 |
-
|
| 300 |
-
if (routerMultimodalEnabled) {
|
| 301 |
-
aliasRaw.multimodal = true;
|
| 302 |
-
aliasRaw.multimodalAcceptedMimetypes = ["image/*"];
|
| 303 |
}
|
|
|
|
| 304 |
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
const
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
// getEndpoint uses the router wrapper regardless of the endpoints array
|
| 311 |
-
getEndpoint: async (): Promise<Endpoint> => makeRouterEndpoint(aliasModel),
|
| 312 |
-
} as ProcessedModel;
|
| 313 |
-
|
| 314 |
-
// Put alias first
|
| 315 |
-
decorated = [aliasModel, ...decorated];
|
| 316 |
-
}
|
| 317 |
|
| 318 |
-
|
| 319 |
|
| 320 |
-
export
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
-
|
| 323 |
-
|
|
|
|
| 324 |
|
| 325 |
-
|
|
|
|
| 326 |
|
| 327 |
export const validateModel = (_models: BackendModel[]) => {
|
| 328 |
// Zod enum function requires 2 parameters
|
|
@@ -331,13 +477,6 @@ export const validateModel = (_models: BackendModel[]) => {
|
|
| 331 |
|
| 332 |
// if `TASK_MODEL` is string & name of a model in `MODELS`, then we use `MODELS[TASK_MODEL]`, else we try to parse `TASK_MODEL` as a model config itself
|
| 333 |
|
| 334 |
-
export const taskModel = addEndpoint(
|
| 335 |
-
config.TASK_MODEL
|
| 336 |
-
? (models.find((m) => m.name === config.TASK_MODEL || m.id === config.TASK_MODEL) ??
|
| 337 |
-
defaultModel)
|
| 338 |
-
: defaultModel
|
| 339 |
-
);
|
| 340 |
-
|
| 341 |
export type BackendModel = Optional<
|
| 342 |
typeof defaultModel,
|
| 343 |
"preprompt" | "parameters" | "multimodal" | "unlisted" | "hasInferenceAPI"
|
|
|
|
| 76 |
|
| 77 |
type ModelOverride = z.infer<typeof overrideEntrySchema>;
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
const openaiBaseUrl = config.OPENAI_BASE_URL
|
| 80 |
? config.OPENAI_BASE_URL.replace(/\/$/, "")
|
| 81 |
: undefined;
|
| 82 |
const isHFRouter = openaiBaseUrl === "https://router.huggingface.co/v1";
|
| 83 |
|
| 84 |
+
const listSchema = z
|
| 85 |
+
.object({
|
| 86 |
+
data: z.array(
|
| 87 |
+
z.object({
|
| 88 |
+
id: z.string(),
|
| 89 |
+
description: z.string().optional(),
|
| 90 |
+
providers: z
|
| 91 |
+
.array(z.object({ supports_tools: z.boolean().optional() }).passthrough())
|
| 92 |
+
.optional(),
|
| 93 |
+
architecture: z
|
| 94 |
+
.object({
|
| 95 |
+
input_modalities: z.array(z.string()).optional(),
|
| 96 |
+
})
|
| 97 |
+
.passthrough()
|
| 98 |
+
.optional(),
|
| 99 |
+
})
|
| 100 |
+
),
|
| 101 |
+
})
|
| 102 |
+
.passthrough();
|
| 103 |
+
|
| 104 |
+
function getChatPromptRender(_m: ModelConfig): (inputs: ChatTemplateInput) => string {
|
| 105 |
+
// Minimal template to support legacy "completions" flow if ever used.
|
| 106 |
+
// We avoid any tokenizer/Jinja usage in this build.
|
| 107 |
+
return ({ messages, preprompt }) => {
|
| 108 |
+
const parts: string[] = [];
|
| 109 |
+
if (preprompt) parts.push(`[SYSTEM]\n${preprompt}`);
|
| 110 |
+
for (const msg of messages) {
|
| 111 |
+
const role = msg.from === "assistant" ? "ASSISTANT" : msg.from.toUpperCase();
|
| 112 |
+
parts.push(`[${role}]\n${msg.content}`);
|
| 113 |
+
}
|
| 114 |
+
parts.push(`[ASSISTANT]`);
|
| 115 |
+
return parts.join("\n\n");
|
| 116 |
+
};
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
const processModel = async (m: ModelConfig) => ({
|
| 120 |
+
...m,
|
| 121 |
+
chatPromptRender: await getChatPromptRender(m),
|
| 122 |
+
id: m.id || m.name,
|
| 123 |
+
displayName: m.displayName || m.name,
|
| 124 |
+
preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
|
| 125 |
+
parameters: { ...m.parameters, stop_sequences: m.parameters?.stop },
|
| 126 |
+
unlisted: m.unlisted ?? false,
|
| 127 |
+
});
|
| 128 |
+
|
| 129 |
+
const addEndpoint = (m: Awaited<ReturnType<typeof processModel>>) => ({
|
| 130 |
+
...m,
|
| 131 |
+
getEndpoint: async (): Promise<Endpoint> => {
|
| 132 |
+
if (!m.endpoints || m.endpoints.length === 0) {
|
| 133 |
+
throw new Error("No endpoints configured. This build requires OpenAI-compatible endpoints.");
|
| 134 |
+
}
|
| 135 |
+
// Only support OpenAI-compatible endpoints in this build
|
| 136 |
+
const endpoint = m.endpoints[0];
|
| 137 |
+
if (endpoint.type !== "openai") {
|
| 138 |
+
throw new Error("Only 'openai' endpoint type is supported in this build");
|
| 139 |
+
}
|
| 140 |
+
return await endpoints.openai({ ...endpoint, model: m });
|
| 141 |
+
},
|
| 142 |
+
});
|
| 143 |
+
|
| 144 |
+
type InternalProcessedModel = Awaited<ReturnType<typeof addEndpoint>> & {
|
| 145 |
+
isRouter: boolean;
|
| 146 |
+
hasInferenceAPI: boolean;
|
| 147 |
+
};
|
| 148 |
+
|
| 149 |
+
const inferenceApiIds: string[] = [];
|
| 150 |
+
|
| 151 |
+
const getModelOverrides = (): ModelOverride[] => {
|
| 152 |
+
const overridesEnv = (Reflect.get(config, "MODELS") as string | undefined) ?? "";
|
| 153 |
+
|
| 154 |
+
if (!overridesEnv.trim()) {
|
| 155 |
+
return [];
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
try {
|
| 159 |
+
return z.array(overrideEntrySchema).parse(JSON5.parse(sanitizeJSONEnv(overridesEnv, "[]")));
|
| 160 |
+
} catch (error) {
|
| 161 |
+
logger.error(error, "[models] Failed to parse MODELS overrides");
|
| 162 |
+
return [];
|
| 163 |
+
}
|
| 164 |
+
};
|
| 165 |
+
|
| 166 |
+
export type ModelsRefreshSummary = {
|
| 167 |
+
refreshedAt: Date;
|
| 168 |
+
durationMs: number;
|
| 169 |
+
added: string[];
|
| 170 |
+
removed: string[];
|
| 171 |
+
changed: string[];
|
| 172 |
+
total: number;
|
| 173 |
+
};
|
| 174 |
+
|
| 175 |
+
export type ProcessedModel = InternalProcessedModel;
|
| 176 |
+
|
| 177 |
+
export let models: ProcessedModel[] = [];
|
| 178 |
+
export let defaultModel!: ProcessedModel;
|
| 179 |
+
export let taskModel!: ProcessedModel;
|
| 180 |
+
export let validModelIdSchema: z.ZodType<string> = z.string();
|
| 181 |
+
export let lastModelRefresh = new Date(0);
|
| 182 |
+
export let lastModelRefreshDurationMs = 0;
|
| 183 |
+
export let lastModelRefreshSummary: ModelsRefreshSummary = {
|
| 184 |
+
refreshedAt: new Date(0),
|
| 185 |
+
durationMs: 0,
|
| 186 |
+
added: [],
|
| 187 |
+
removed: [],
|
| 188 |
+
changed: [],
|
| 189 |
+
total: 0,
|
| 190 |
+
};
|
| 191 |
+
|
| 192 |
+
let inflightRefresh: Promise<ModelsRefreshSummary> | null = null;
|
| 193 |
+
|
| 194 |
+
const createValidModelIdSchema = (modelList: ProcessedModel[]): z.ZodType<string> => {
|
| 195 |
+
if (modelList.length === 0) {
|
| 196 |
+
throw new Error("No models available to build validation schema");
|
| 197 |
+
}
|
| 198 |
+
const ids = new Set(modelList.map((m) => m.id));
|
| 199 |
+
return z.string().refine((value) => ids.has(value), "Invalid model id");
|
| 200 |
+
};
|
| 201 |
+
|
| 202 |
+
const resolveTaskModel = (modelList: ProcessedModel[]) => {
|
| 203 |
+
if (modelList.length === 0) {
|
| 204 |
+
throw new Error("No models available to select task model");
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
if (config.TASK_MODEL) {
|
| 208 |
+
const preferred = modelList.find(
|
| 209 |
+
(m) => m.name === config.TASK_MODEL || m.id === config.TASK_MODEL
|
| 210 |
+
);
|
| 211 |
+
if (preferred) {
|
| 212 |
+
return preferred;
|
| 213 |
+
}
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
return modelList[0];
|
| 217 |
+
};
|
| 218 |
+
|
| 219 |
+
const signatureForModel = (model: ProcessedModel) =>
|
| 220 |
+
JSON.stringify({
|
| 221 |
+
description: model.description,
|
| 222 |
+
displayName: model.displayName,
|
| 223 |
+
providers: model.providers,
|
| 224 |
+
parameters: model.parameters,
|
| 225 |
+
preprompt: model.preprompt,
|
| 226 |
+
prepromptUrl: model.prepromptUrl,
|
| 227 |
+
endpoints:
|
| 228 |
+
model.endpoints?.map((endpoint) => {
|
| 229 |
+
if (endpoint.type === "openai") {
|
| 230 |
+
const { type, baseURL } = endpoint;
|
| 231 |
+
return { type, baseURL };
|
| 232 |
+
}
|
| 233 |
+
return { type: endpoint.type };
|
| 234 |
+
}) ?? null,
|
| 235 |
+
multimodal: model.multimodal,
|
| 236 |
+
multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes,
|
| 237 |
+
isRouter: model.isRouter,
|
| 238 |
+
hasInferenceAPI: model.hasInferenceAPI,
|
| 239 |
+
});
|
| 240 |
+
|
| 241 |
+
const applyModelState = (newModels: ProcessedModel[], startedAt: number): ModelsRefreshSummary => {
|
| 242 |
+
if (newModels.length === 0) {
|
| 243 |
+
throw new Error("Failed to load any models from upstream");
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
const previousIds = new Set(models.map((m) => m.id));
|
| 247 |
+
const previousSignatures = new Map(models.map((m) => [m.id, signatureForModel(m)]));
|
| 248 |
+
const refreshedAt = new Date();
|
| 249 |
+
const durationMs = Date.now() - startedAt;
|
| 250 |
+
|
| 251 |
+
models = newModels;
|
| 252 |
+
defaultModel = models[0];
|
| 253 |
+
taskModel = resolveTaskModel(models);
|
| 254 |
+
validModelIdSchema = createValidModelIdSchema(models);
|
| 255 |
+
lastModelRefresh = refreshedAt;
|
| 256 |
+
lastModelRefreshDurationMs = durationMs;
|
| 257 |
+
|
| 258 |
+
const added = newModels.map((m) => m.id).filter((id) => !previousIds.has(id));
|
| 259 |
+
const removed = Array.from(previousIds).filter(
|
| 260 |
+
(id) => !newModels.some((model) => model.id === id)
|
| 261 |
+
);
|
| 262 |
+
const changed = newModels
|
| 263 |
+
.filter((model) => {
|
| 264 |
+
const previousSignature = previousSignatures.get(model.id);
|
| 265 |
+
return previousSignature !== undefined && previousSignature !== signatureForModel(model);
|
| 266 |
+
})
|
| 267 |
+
.map((model) => model.id);
|
| 268 |
+
|
| 269 |
+
const summary: ModelsRefreshSummary = {
|
| 270 |
+
refreshedAt,
|
| 271 |
+
durationMs,
|
| 272 |
+
added,
|
| 273 |
+
removed,
|
| 274 |
+
changed,
|
| 275 |
+
total: models.length,
|
| 276 |
+
};
|
| 277 |
+
|
| 278 |
+
lastModelRefreshSummary = summary;
|
| 279 |
+
|
| 280 |
+
logger.info(
|
| 281 |
+
{
|
| 282 |
+
total: summary.total,
|
| 283 |
+
added: summary.added,
|
| 284 |
+
removed: summary.removed,
|
| 285 |
+
changed: summary.changed,
|
| 286 |
+
durationMs: summary.durationMs,
|
| 287 |
+
},
|
| 288 |
+
"[models] Model cache refreshed"
|
| 289 |
+
);
|
| 290 |
+
|
| 291 |
+
return summary;
|
| 292 |
+
};
|
| 293 |
+
|
| 294 |
+
const buildModels = async (): Promise<ProcessedModel[]> => {
|
| 295 |
+
if (!openaiBaseUrl) {
|
| 296 |
+
logger.error(
|
| 297 |
+
"OPENAI_BASE_URL is required. Set it to an OpenAI-compatible base (e.g., https://router.huggingface.co/v1)."
|
| 298 |
+
);
|
| 299 |
+
throw new Error("OPENAI_BASE_URL not set");
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
try {
|
| 303 |
const baseURL = openaiBaseUrl;
|
| 304 |
logger.info({ baseURL }, "[models] Using OpenAI-compatible base URL");
|
|
|
|
| 324 |
const json = await response.json();
|
| 325 |
logger.info({ keys: Object.keys(json || {}) }, "[models] Response keys");
|
| 326 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
const parsed = listSchema.parse(json);
|
| 328 |
logger.info({ count: parsed.data.length }, "[models] Parsed models count");
|
| 329 |
|
| 330 |
+
let modelsRaw = parsed.data.map((m) => {
|
| 331 |
let logoUrl: string | undefined = undefined;
|
| 332 |
if (isHFRouter && m.id.includes("/")) {
|
| 333 |
const org = m.id.split("/")[0];
|
|
|
|
| 357 |
],
|
| 358 |
} as ModelConfig;
|
| 359 |
}) as ModelConfig[];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
|
| 361 |
+
const overrides = getModelOverrides();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
+
if (overrides.length) {
|
| 364 |
+
const overrideMap = new Map<string, ModelOverride>();
|
| 365 |
+
for (const override of overrides) {
|
| 366 |
+
for (const key of [override.id, override.name]) {
|
| 367 |
+
const trimmed = key?.trim();
|
| 368 |
+
if (trimmed) overrideMap.set(trimmed, override);
|
| 369 |
+
}
|
| 370 |
+
}
|
| 371 |
|
| 372 |
+
modelsRaw = modelsRaw.map((model) => {
|
| 373 |
+
const override = overrideMap.get(model.id ?? "") ?? overrideMap.get(model.name ?? "");
|
| 374 |
+
if (!override) return model;
|
| 375 |
|
| 376 |
+
const { id, name, ...rest } = override;
|
| 377 |
+
void id;
|
| 378 |
+
void name;
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
+
return {
|
| 381 |
+
...model,
|
| 382 |
+
...rest,
|
| 383 |
+
};
|
| 384 |
+
});
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
|
| 387 |
+
const builtModels = await Promise.all(
|
| 388 |
+
modelsRaw.map((e) =>
|
| 389 |
+
processModel(e)
|
| 390 |
+
.then(addEndpoint)
|
| 391 |
+
.then(async (m) => ({
|
| 392 |
+
...m,
|
| 393 |
+
hasInferenceAPI: inferenceApiIds.includes(m.id ?? m.name),
|
| 394 |
+
// router decoration added later
|
| 395 |
+
isRouter: false as boolean,
|
| 396 |
+
}))
|
| 397 |
+
)
|
| 398 |
+
);
|
| 399 |
+
|
| 400 |
+
const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim();
|
| 401 |
+
const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni";
|
| 402 |
+
const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim();
|
| 403 |
+
const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni";
|
| 404 |
+
const routerMultimodalEnabled =
|
| 405 |
+
(config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true";
|
| 406 |
+
|
| 407 |
+
let decorated = builtModels as ProcessedModel[];
|
| 408 |
+
|
| 409 |
+
if (archBase) {
|
| 410 |
+
// Build a minimal model config for the alias
|
| 411 |
+
const aliasRaw = {
|
| 412 |
+
id: routerAliasId,
|
| 413 |
+
name: routerAliasId,
|
| 414 |
+
displayName: routerLabel,
|
| 415 |
+
logoUrl: routerLogo || undefined,
|
| 416 |
+
preprompt: "",
|
| 417 |
+
endpoints: [
|
| 418 |
+
{
|
| 419 |
+
type: "openai" as const,
|
| 420 |
+
baseURL: openaiBaseUrl,
|
| 421 |
+
},
|
| 422 |
+
],
|
| 423 |
+
// Keep the alias visible
|
| 424 |
+
unlisted: false,
|
| 425 |
+
} as ModelConfig;
|
| 426 |
|
| 427 |
+
if (routerMultimodalEnabled) {
|
| 428 |
+
aliasRaw.multimodal = true;
|
| 429 |
+
aliasRaw.multimodalAcceptedMimetypes = ["image/*"];
|
| 430 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
|
| 432 |
+
const aliasBase = await processModel(aliasRaw);
|
| 433 |
+
// Create a self-referential ProcessedModel for the router endpoint
|
| 434 |
+
const aliasModel: ProcessedModel = {
|
| 435 |
+
...aliasBase,
|
| 436 |
+
isRouter: true,
|
| 437 |
+
hasInferenceAPI: false,
|
| 438 |
+
// getEndpoint uses the router wrapper regardless of the endpoints array
|
| 439 |
+
getEndpoint: async (): Promise<Endpoint> => makeRouterEndpoint(aliasModel),
|
| 440 |
+
} as ProcessedModel;
|
| 441 |
+
|
| 442 |
+
// Put alias first
|
| 443 |
+
decorated = [aliasModel, ...decorated];
|
| 444 |
+
}
|
| 445 |
|
| 446 |
+
return decorated;
|
| 447 |
+
} catch (e) {
|
| 448 |
+
logger.error(e, "Failed to load models from OpenAI base URL");
|
| 449 |
+
throw e;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
}
|
| 451 |
+
};
|
| 452 |
|
| 453 |
+
const rebuildModels = async (): Promise<ModelsRefreshSummary> => {
|
| 454 |
+
const startedAt = Date.now();
|
| 455 |
+
const newModels = await buildModels();
|
| 456 |
+
return applyModelState(newModels, startedAt);
|
| 457 |
+
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
|
| 459 |
+
await rebuildModels();
|
| 460 |
|
| 461 |
+
export const refreshModels = async (): Promise<ModelsRefreshSummary> => {
|
| 462 |
+
if (inflightRefresh) {
|
| 463 |
+
return inflightRefresh;
|
| 464 |
+
}
|
| 465 |
|
| 466 |
+
inflightRefresh = rebuildModels().finally(() => {
|
| 467 |
+
inflightRefresh = null;
|
| 468 |
+
});
|
| 469 |
|
| 470 |
+
return inflightRefresh;
|
| 471 |
+
};
|
| 472 |
|
| 473 |
export const validateModel = (_models: BackendModel[]) => {
|
| 474 |
// Zod enum function requires 2 parameters
|
|
|
|
| 477 |
|
| 478 |
// if `TASK_MODEL` is string & name of a model in `MODELS`, then we use `MODELS[TASK_MODEL]`, else we try to parse `TASK_MODEL` as a model config itself
|
| 479 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
export type BackendModel = Optional<
|
| 481 |
typeof defaultModel,
|
| 482 |
"preprompt" | "parameters" | "multimodal" | "unlisted" | "hasInferenceAPI"
|