import { config } from "$lib/server/config"; import type { ChatTemplateInput } from "$lib/types/Template"; import { z } from "zod"; import endpoints, { endpointSchema, type Endpoint } from "./endpoints/endpoints"; import JSON5 from "json5"; import { logger } from "$lib/server/logger"; import { makeRouterEndpoint } from "$lib/server/router/endpoint"; type Optional = Pick, K> & Omit; const sanitizeJSONEnv = (val: string, fallback: string) => { const raw = (val ?? "").trim(); const unquoted = raw.startsWith("`") && raw.endsWith("`") ? raw.slice(1, -1) : raw; return unquoted || fallback; }; const modelConfig = z.object({ /** Used as an identifier in DB */ id: z.string().optional(), /** Used to link to the model page, and for inference */ name: z.string().default(""), displayName: z.string().min(1).optional(), description: z.string().min(1).optional(), logoUrl: z.string().url().optional(), websiteUrl: z.string().url().optional(), modelUrl: z.string().url().optional(), tokenizer: z.never().optional(), datasetName: z.string().min(1).optional(), datasetUrl: z.string().url().optional(), preprompt: z.string().default(""), prepromptUrl: z.string().url().optional(), chatPromptTemplate: z.never().optional(), promptExamples: z .array( z.object({ title: z.string().min(1), prompt: z.string().min(1), }) ) .optional(), endpoints: z.array(endpointSchema).optional(), providers: z.array(z.object({ supports_tools: z.boolean().optional() }).passthrough()).optional(), parameters: z .object({ temperature: z.number().min(0).max(2).optional(), truncate: z.number().int().positive().optional(), max_tokens: z.number().int().positive().optional(), stop: z.array(z.string()).optional(), top_p: z.number().positive().optional(), top_k: z.number().positive().optional(), frequency_penalty: z.number().min(-2).max(2).optional(), presence_penalty: z.number().min(-2).max(2).optional(), }) .passthrough() .optional(), multimodal: z.boolean().default(false), multimodalAcceptedMimetypes: z.array(z.string()).optional(), unlisted: z.boolean().default(false), embeddingModel: z.never().optional(), /** Used to enable/disable system prompt usage */ systemRoleSupported: z.boolean().default(true), }); type ModelConfig = z.infer; const overrideEntrySchema = modelConfig .partial() .extend({ id: z.string().optional(), name: z.string().optional(), }) .refine((value) => Boolean((value.id ?? value.name)?.trim()), { message: "Model override entry must provide an id or name", }); type ModelOverride = z.infer; const openaiBaseUrl = config.OPENAI_BASE_URL ? config.OPENAI_BASE_URL.replace(/\/$/, "") : undefined; const isHFRouter = openaiBaseUrl === "https://router.huggingface.co/v1"; const listSchema = z .object({ data: z.array( z.object({ id: z.string(), description: z.string().optional(), providers: z .array(z.object({ supports_tools: z.boolean().optional() }).passthrough()) .optional(), architecture: z .object({ input_modalities: z.array(z.string()).optional(), }) .passthrough() .optional(), }) ), }) .passthrough(); function getChatPromptRender(_m: ModelConfig): (inputs: ChatTemplateInput) => string { // Minimal template to support legacy "completions" flow if ever used. // We avoid any tokenizer/Jinja usage in this build. return ({ messages, preprompt }) => { const parts: string[] = []; if (preprompt) parts.push(`[SYSTEM]\n${preprompt}`); for (const msg of messages) { const role = msg.from === "assistant" ? "ASSISTANT" : msg.from.toUpperCase(); parts.push(`[${role}]\n${msg.content}`); } parts.push(`[ASSISTANT]`); return parts.join("\n\n"); }; } const processModel = async (m: ModelConfig) => ({ ...m, chatPromptRender: await getChatPromptRender(m), id: m.id || m.name, displayName: m.displayName || m.name, preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt, parameters: { ...m.parameters, stop_sequences: m.parameters?.stop }, unlisted: m.unlisted ?? false, }); const addEndpoint = (m: Awaited>) => ({ ...m, getEndpoint: async (): Promise => { if (!m.endpoints || m.endpoints.length === 0) { throw new Error("No endpoints configured. This build requires OpenAI-compatible endpoints."); } // Only support OpenAI-compatible endpoints in this build const endpoint = m.endpoints[0]; if (endpoint.type !== "openai") { throw new Error("Only 'openai' endpoint type is supported in this build"); } return await endpoints.openai({ ...endpoint, model: m }); }, }); type InternalProcessedModel = Awaited> & { isRouter: boolean; hasInferenceAPI: boolean; }; const inferenceApiIds: string[] = []; const getModelOverrides = (): ModelOverride[] => { const overridesEnv = (Reflect.get(config, "MODELS") as string | undefined) ?? ""; if (!overridesEnv.trim()) { return []; } try { return z.array(overrideEntrySchema).parse(JSON5.parse(sanitizeJSONEnv(overridesEnv, "[]"))); } catch (error) { logger.error(error, "[models] Failed to parse MODELS overrides"); return []; } }; export type ModelsRefreshSummary = { refreshedAt: Date; durationMs: number; added: string[]; removed: string[]; changed: string[]; total: number; }; export type ProcessedModel = InternalProcessedModel; export let models: ProcessedModel[] = []; export let defaultModel!: ProcessedModel; export let taskModel!: ProcessedModel; export let validModelIdSchema: z.ZodType = z.string(); export let lastModelRefresh = new Date(0); export let lastModelRefreshDurationMs = 0; export let lastModelRefreshSummary: ModelsRefreshSummary = { refreshedAt: new Date(0), durationMs: 0, added: [], removed: [], changed: [], total: 0, }; let inflightRefresh: Promise | null = null; const createValidModelIdSchema = (modelList: ProcessedModel[]): z.ZodType => { if (modelList.length === 0) { throw new Error("No models available to build validation schema"); } const ids = new Set(modelList.map((m) => m.id)); return z.string().refine((value) => ids.has(value), "Invalid model id"); }; const resolveTaskModel = (modelList: ProcessedModel[]) => { if (modelList.length === 0) { throw new Error("No models available to select task model"); } if (config.TASK_MODEL) { const preferred = modelList.find( (m) => m.name === config.TASK_MODEL || m.id === config.TASK_MODEL ); if (preferred) { return preferred; } } return modelList[0]; }; const signatureForModel = (model: ProcessedModel) => JSON.stringify({ description: model.description, displayName: model.displayName, providers: model.providers, parameters: model.parameters, preprompt: model.preprompt, prepromptUrl: model.prepromptUrl, endpoints: model.endpoints?.map((endpoint) => { if (endpoint.type === "openai") { const { type, baseURL } = endpoint; return { type, baseURL }; } return { type: endpoint.type }; }) ?? null, multimodal: model.multimodal, multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes, isRouter: model.isRouter, hasInferenceAPI: model.hasInferenceAPI, }); const applyModelState = (newModels: ProcessedModel[], startedAt: number): ModelsRefreshSummary => { if (newModels.length === 0) { throw new Error("Failed to load any models from upstream"); } const previousIds = new Set(models.map((m) => m.id)); const previousSignatures = new Map(models.map((m) => [m.id, signatureForModel(m)])); const refreshedAt = new Date(); const durationMs = Date.now() - startedAt; models = newModels; defaultModel = models[0]; taskModel = resolveTaskModel(models); validModelIdSchema = createValidModelIdSchema(models); lastModelRefresh = refreshedAt; lastModelRefreshDurationMs = durationMs; const added = newModels.map((m) => m.id).filter((id) => !previousIds.has(id)); const removed = Array.from(previousIds).filter( (id) => !newModels.some((model) => model.id === id) ); const changed = newModels .filter((model) => { const previousSignature = previousSignatures.get(model.id); return previousSignature !== undefined && previousSignature !== signatureForModel(model); }) .map((model) => model.id); const summary: ModelsRefreshSummary = { refreshedAt, durationMs, added, removed, changed, total: models.length, }; lastModelRefreshSummary = summary; logger.info( { total: summary.total, added: summary.added, removed: summary.removed, changed: summary.changed, durationMs: summary.durationMs, }, "[models] Model cache refreshed" ); return summary; }; const buildModels = async (): Promise => { if (!openaiBaseUrl) { logger.error( "OPENAI_BASE_URL is required. Set it to an OpenAI-compatible base (e.g., https://router.huggingface.co/v1)." ); throw new Error("OPENAI_BASE_URL not set"); } try { const baseURL = openaiBaseUrl; logger.info({ baseURL }, "[models] Using OpenAI-compatible base URL"); // Canonical auth token is OPENAI_API_KEY; keep HF_TOKEN as legacy alias const authToken = config.OPENAI_API_KEY || config.HF_TOKEN; // Use auth token from the start if available to avoid rate limiting issues // Some APIs rate-limit unauthenticated requests more aggressively const response = await fetch(`${baseURL}/models`, { headers: authToken ? { Authorization: `Bearer ${authToken}` } : undefined, }); logger.info({ status: response.status }, "[models] First fetch status"); if (!response.ok && response.status === 401 && !authToken) { // If we get 401 and didn't have a token, there's nothing we can do throw new Error( `Failed to fetch ${baseURL}/models: ${response.status} ${response.statusText} (no auth token available)` ); } if (!response.ok) { throw new Error( `Failed to fetch ${baseURL}/models: ${response.status} ${response.statusText}` ); } const json = await response.json(); logger.info({ keys: Object.keys(json || {}) }, "[models] Response keys"); const parsed = listSchema.parse(json); logger.info({ count: parsed.data.length }, "[models] Parsed models count"); let modelsRaw = parsed.data.map((m) => { let logoUrl: string | undefined = undefined; if (isHFRouter && m.id.includes("/")) { const org = m.id.split("/")[0]; logoUrl = `https://huggingface.co/api/organizations/${encodeURIComponent(org)}/avatar?redirect=true`; } const inputModalities = (m.architecture?.input_modalities ?? []).map((modality) => modality.toLowerCase() ); const supportsImageInput = inputModalities.includes("image") || inputModalities.includes("vision"); return { id: m.id, name: m.id, displayName: m.id, description: m.description, logoUrl, providers: m.providers, multimodal: supportsImageInput, multimodalAcceptedMimetypes: supportsImageInput ? ["image/*"] : undefined, endpoints: [ { type: "openai" as const, baseURL, // apiKey will be taken from OPENAI_API_KEY or HF_TOKEN automatically }, ], } as ModelConfig; }) as ModelConfig[]; const overrides = getModelOverrides(); if (overrides.length) { const overrideMap = new Map(); for (const override of overrides) { for (const key of [override.id, override.name]) { const trimmed = key?.trim(); if (trimmed) overrideMap.set(trimmed, override); } } modelsRaw = modelsRaw.map((model) => { const override = overrideMap.get(model.id ?? "") ?? overrideMap.get(model.name ?? ""); if (!override) return model; const { id, name, ...rest } = override; void id; void name; return { ...model, ...rest, }; }); } const builtModels = await Promise.all( modelsRaw.map((e) => processModel(e) .then(addEndpoint) .then(async (m) => ({ ...m, hasInferenceAPI: inferenceApiIds.includes(m.id ?? m.name), // router decoration added later isRouter: false as boolean, })) ) ); const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim(); const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni"; const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim(); const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni"; const routerMultimodalEnabled = (config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true"; let decorated = builtModels as ProcessedModel[]; if (archBase) { // Build a minimal model config for the alias const aliasRaw = { id: routerAliasId, name: routerAliasId, displayName: routerLabel, description: "Automatically routes your messages to the best model for your request.", logoUrl: routerLogo || undefined, preprompt: "", endpoints: [ { type: "openai" as const, baseURL: openaiBaseUrl, }, ], // Keep the alias visible unlisted: false, } as ModelConfig; if (routerMultimodalEnabled) { aliasRaw.multimodal = true; aliasRaw.multimodalAcceptedMimetypes = ["image/*"]; } const aliasBase = await processModel(aliasRaw); // Create a self-referential ProcessedModel for the router endpoint const aliasModel: ProcessedModel = { ...aliasBase, isRouter: true, hasInferenceAPI: false, // getEndpoint uses the router wrapper regardless of the endpoints array getEndpoint: async (): Promise => makeRouterEndpoint(aliasModel), } as ProcessedModel; // Put alias first decorated = [aliasModel, ...decorated]; } return decorated; } catch (e) { logger.error(e, "Failed to load models from OpenAI base URL"); throw e; } }; const rebuildModels = async (): Promise => { const startedAt = Date.now(); const newModels = await buildModels(); return applyModelState(newModels, startedAt); }; await rebuildModels(); export const refreshModels = async (): Promise => { if (inflightRefresh) { return inflightRefresh; } inflightRefresh = rebuildModels().finally(() => { inflightRefresh = null; }); return inflightRefresh; }; export const validateModel = (_models: BackendModel[]) => { // Zod enum function requires 2 parameters return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]); }; // if `TASK_MODEL` is string & name of a model in `MODELS`, then we use `MODELS[TASK_MODEL]`, else we try to parse `TASK_MODEL` as a model config itself export type BackendModel = Optional< typeof defaultModel, "preprompt" | "parameters" | "multimodal" | "unlisted" | "hasInferenceAPI" >;