Spaces:
Sleeping
Sleeping
File size: 12,814 Bytes
48059af 43606a3 d13f9cf b5ae065 144112e 20a343f 486ffa7 7bf1507 20a343f 11983d2 b13e794 7bf1507 bf75aa7 a68038a bf75aa7 c087a6b 305114d c087a6b b1253fd c087a6b 7bf1507 c087a6b 7bf1507 c087a6b b5ae065 7bf1507 c087a6b 9c8e5e0 b07f0b1 dd2acb7 c087a6b b5ae065 3b53c7a 9c8e5e0 01b06a3 c087a6b 241ba68 aa59751 4a66e10 7bf1507 5340bb9 bf75aa7 c087a6b d13f9cf 7bf1507 de36cdc 7bf1507 f887995 7bf1507 f887995 7bf1507 f887995 7bf1507 f887995 7bf1507 de36cdc 7bf1507 72db8f3 7bf1507 de36cdc 7bf1507 de36cdc 7bf1507 de36cdc 7bf1507 de36cdc f2a2a86 7bf1507 144112e 7bf1507 144112e 7bf1507 144112e 7bf1507 144112e 7bf1507 5340bb9 7bf1507 5340bb9 7bf1507 144112e 7bf1507 3b53c7a 7bf1507 144112e 7bf1507 144112e 7bf1507 c087a6b 144112e c087a6b 7bf1507 c087a6b b5ae065 b13e794 7bf1507 b5ae065 7bf1507 b5ae065 b13e794 b5ae065 7bf1507 cdc7abc 7bf1507 cdc7abc 451ffc4 7bf1507 cdc7abc 564e576 b5ae065 7bf1507 9092d43 7bf1507 3b53c7a 7bf1507 3b53c7a 7bf1507 3b53c7a 7bf1507 9092d43 7bf1507 3b53c7a 7bf1507 b13e794 3b53c7a 7bf1507 cdc7abc 371a45e 388034d 371a45e b5ae065 01b06a3 c202241 48059af c202241 371a45e c202241 7bf1507 c202241 c087a6b d885316 b5ae065 de36cdc 48059af 7bf1507 de36cdc c087a6b 4a66e10 7bf1507 4a66e10 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 |
import { config } from "$lib/server/config";
import type { ChatTemplateInput } from "$lib/types/Template";
import { z } from "zod";
import endpoints, { endpointSchema, type Endpoint } from "./endpoints/endpoints";
import JSON5 from "json5";
import { logger } from "$lib/server/logger";
import { makeRouterEndpoint } from "$lib/server/router/endpoint";
type Optional<T, K extends keyof T> = Pick<Partial<T>, K> & Omit<T, K>;
export interface EndpointOptions {
apiKey?: string;
}
const sanitizeJSONEnv = (val: string, fallback: string) => {
const raw = (val ?? "").trim();
const unquoted = raw.startsWith("`") && raw.endsWith("`") ? raw.slice(1, -1) : raw;
return unquoted || fallback;
};
const reasoningSchema = z.union([
z.object({
type: z.literal("regex"), // everything is reasoning, extract the answer from the regex
regex: z.string(),
}),
z.object({
type: z.literal("tokens"), // use beginning and end tokens that define the reasoning portion of the answer
beginToken: z.string(), // empty string means the model starts in reasoning mode
endToken: z.string(),
}),
z.object({
type: z.literal("summarize"), // everything is reasoning, summarize the answer
}),
]);
const modelConfig = z.object({
/** Used as an identifier in DB */
id: z.string().optional(),
/** Used to link to the model page, and for inference */
name: z.string().default(""),
displayName: z.string().min(1).optional(),
description: z.string().min(1).optional(),
logoUrl: z.string().url().optional(),
websiteUrl: z.string().url().optional(),
modelUrl: z.string().url().optional(),
tokenizer: z.never().optional(),
datasetName: z.string().min(1).optional(),
datasetUrl: z.string().url().optional(),
preprompt: z.string().default(""),
prepromptUrl: z.string().url().optional(),
chatPromptTemplate: z.never().optional(),
promptExamples: z
.array(
z.object({
title: z.string().min(1),
prompt: z.string().min(1),
})
)
.optional(),
endpoints: z.array(endpointSchema).optional(),
providers: z.array(z.object({ supports_tools: z.boolean().optional() }).passthrough()).optional(),
parameters: z
.object({
temperature: z.number().min(0).max(2).optional(),
truncate: z.number().int().positive().optional(),
max_tokens: z.number().int().positive().optional(),
stop: z.array(z.string()).optional(),
top_p: z.number().positive().optional(),
top_k: z.number().positive().optional(),
frequency_penalty: z.number().min(-2).max(2).optional(),
presence_penalty: z.number().min(-2).max(2).optional(),
})
.passthrough()
.optional(),
multimodal: z.boolean().default(false),
multimodalAcceptedMimetypes: z.array(z.string()).optional(),
unlisted: z.boolean().default(false),
embeddingModel: z.never().optional(),
/** Used to enable/disable system prompt usage */
systemRoleSupported: z.boolean().default(true),
reasoning: reasoningSchema.optional(),
});
type ModelConfig = z.infer<typeof modelConfig>;
const overrideEntrySchema = modelConfig
.partial()
.extend({
id: z.string().optional(),
name: z.string().optional(),
})
.refine((value) => Boolean((value.id ?? value.name)?.trim()), {
message: "Model override entry must provide an id or name",
});
type ModelOverride = z.infer<typeof overrideEntrySchema>;
// ggufModelsConfig unused in this build
// Source models exclusively from an OpenAI-compatible endpoint.
let modelsRaw: ModelConfig[] = [];
// Require explicit base URL; no implicit default here
const openaiBaseUrl = config.OPENAI_BASE_URL
? config.OPENAI_BASE_URL.replace(/\/$/, "")
: undefined;
const isHFRouter = openaiBaseUrl === "https://router.huggingface.co/v1";
if (openaiBaseUrl) {
try {
const baseURL = openaiBaseUrl;
logger.info({ baseURL }, "[models] Using OpenAI-compatible base URL");
// Canonical auth token is OPENAI_API_KEY; keep HF_TOKEN as legacy alias
const authToken = config.OPENAI_API_KEY || config.HF_TOKEN || "";
// Try unauthenticated request first (many model lists are public, e.g. HF router)
let response = await fetch(`${baseURL}/models`);
logger.info({ status: response.status }, "[models] First fetch status");
if (response.status === 401 || response.status === 403) {
// Retry with Authorization header if available
response = await fetch(`${baseURL}/models`, {
headers: authToken ? { Authorization: `Bearer ${authToken}` } : undefined,
});
logger.info({ status: response.status }, "[models] Retried fetch status");
}
if (!response.ok) {
throw new Error(
`Failed to fetch ${baseURL}/models: ${response.status} ${response.statusText}`
);
}
const json = await response.json();
logger.info({ keys: Object.keys(json || {}) }, "[models] Response keys");
const listSchema = z
.object({
data: z.array(
z.object({
id: z.string(),
description: z.string().optional(),
providers: z
.array(z.object({ supports_tools: z.boolean().optional() }).passthrough())
.optional(),
architecture: z
.object({
input_modalities: z.array(z.string()).optional(),
})
.passthrough()
.optional(),
})
),
})
.passthrough();
const parsed = listSchema.parse(json);
logger.info({ count: parsed.data.length }, "[models] Parsed models count");
modelsRaw = parsed.data.map((m) => {
let logoUrl: string | undefined = undefined;
if (isHFRouter && m.id.includes("/")) {
const org = m.id.split("/")[0];
logoUrl = `https://huggingface.co/api/organizations/${encodeURIComponent(org)}/avatar?redirect=true`;
}
const inputModalities = (m.architecture?.input_modalities ?? []).map((modality) =>
modality.toLowerCase()
);
const supportsImageInput =
inputModalities.includes("image") || inputModalities.includes("vision");
return {
id: m.id,
name: m.id,
displayName: m.id,
description: m.description,
logoUrl,
providers: m.providers,
multimodal: supportsImageInput,
multimodalAcceptedMimetypes: supportsImageInput ? ["image/*"] : undefined,
endpoints: [
{
type: "openai" as const,
baseURL,
// apiKey will be taken from OPENAI_API_KEY or HF_TOKEN automatically
},
],
} as ModelConfig;
}) as ModelConfig[];
} catch (e) {
logger.error(e, "Failed to load models from OpenAI base URL");
throw e;
}
} else {
logger.error(
"OPENAI_BASE_URL is required. Set it to an OpenAI-compatible base (e.g., https://router.huggingface.co/v1)."
);
throw new Error("OPENAI_BASE_URL not set");
}
// Filter available models
const allowedModelsEnv = (config.ALLOWED_MODELS || "").trim();
if (allowedModelsEnv) {
const allowedModelIds = allowedModelsEnv
.split(",")
.map((id) => id.trim())
.filter(Boolean);
const allowedSet = new Set(allowedModelIds);
const beforeCount = modelsRaw.length;
modelsRaw = modelsRaw.filter((model) => allowedSet.has(model.id ?? model.name));
logger.info(
{ filtered: beforeCount - modelsRaw.length, allowed: modelsRaw.length },
"[models] Filtered models"
);
}
let modelOverrides: ModelOverride[] = [];
const overridesEnv = (Reflect.get(config, "MODELS") as string | undefined) ?? "";
if (overridesEnv.trim()) {
try {
modelOverrides = z
.array(overrideEntrySchema)
.parse(JSON5.parse(sanitizeJSONEnv(overridesEnv, "[]")));
} catch (error) {
logger.error(error, "[models] Failed to parse MODELS overrides");
}
}
if (modelOverrides.length) {
const overrideMap = new Map<string, ModelOverride>();
for (const override of modelOverrides) {
for (const key of [override.id, override.name]) {
const trimmed = key?.trim();
if (trimmed) overrideMap.set(trimmed, override);
}
}
modelsRaw = modelsRaw.map((model) => {
const override = overrideMap.get(model.id ?? "") ?? overrideMap.get(model.name ?? "");
if (!override) return model;
const { id, name, ...rest } = override;
void id;
void name;
return {
...model,
...rest,
};
});
}
function getChatPromptRender(_m: ModelConfig): (inputs: ChatTemplateInput) => string {
// Minimal template to support legacy "completions" flow if ever used.
// We avoid any tokenizer/Jinja usage in this build.
return ({ messages, preprompt }) => {
const parts: string[] = [];
if (preprompt) parts.push(`[SYSTEM]\n${preprompt}`);
for (const msg of messages) {
const role = msg.from === "assistant" ? "ASSISTANT" : msg.from.toUpperCase();
parts.push(`[${role}]\n${msg.content}`);
}
parts.push(`[ASSISTANT]`);
return parts.join("\n\n");
};
}
const processModel = async (m: ModelConfig) => ({
...m,
chatPromptRender: await getChatPromptRender(m),
id: m.id || m.name,
displayName: m.displayName || m.name,
preprompt: m.prepromptUrl ? await fetch(m.prepromptUrl).then((r) => r.text()) : m.preprompt,
parameters: { ...m.parameters, stop_sequences: m.parameters?.stop },
unlisted: m.unlisted ?? false,
});
const addEndpoint = (m: Awaited<ReturnType<typeof processModel>>) => ({
...m,
getEndpoint: async (options?: EndpointOptions): Promise<Endpoint> => {
if (!m.endpoints || m.endpoints.length === 0) {
throw new Error("No endpoints configured. This build requires OpenAI-compatible endpoints.");
}
// Only support OpenAI-compatible endpoints in this build
const endpoint = m.endpoints[0];
if (endpoint.type !== "openai") {
throw new Error("Only 'openai' endpoint type is supported in this build");
}
const overrideApiKey = options?.apiKey;
return await endpoints.openai({
...endpoint,
model: m,
...(overrideApiKey ? { apiKey: overrideApiKey } : {}),
});
},
});
const inferenceApiIds: string[] = [];
const builtModels = await Promise.all(
modelsRaw.map((e) =>
processModel(e)
.then(addEndpoint)
.then(async (m) => ({
...m,
hasInferenceAPI: inferenceApiIds.includes(m.id ?? m.name),
// router decoration added later
isRouter: false as boolean,
}))
)
);
// Inject a synthetic router alias ("Omni") if Arch router is configured
const archBase = (config.LLM_ROUTER_ARCH_BASE_URL || "").trim();
const routerLabel = (config.PUBLIC_LLM_ROUTER_DISPLAY_NAME || "Omni").trim() || "Omni";
const routerLogo = (config.PUBLIC_LLM_ROUTER_LOGO_URL || "").trim();
const routerAliasId = (config.PUBLIC_LLM_ROUTER_ALIAS_ID || "omni").trim() || "omni";
const routerMultimodalEnabled =
(config.LLM_ROUTER_ENABLE_MULTIMODAL || "").toLowerCase() === "true";
let decorated = builtModels as ProcessedModel[];
if (archBase) {
// Build a minimal model config for the alias
const aliasRaw: ModelConfig = {
id: routerAliasId,
name: routerAliasId,
displayName: routerLabel,
logoUrl: routerLogo || undefined,
preprompt: "",
endpoints: [
{
type: "openai" as const,
baseURL: openaiBaseUrl,
},
],
// Keep the alias visible
unlisted: false,
} as ProcessedModel;
if (routerMultimodalEnabled) {
aliasRaw.multimodal = true;
aliasRaw.multimodalAcceptedMimetypes = ["image/*"];
}
const aliasBase = await processModel(aliasRaw);
// Create a self-referential ProcessedModel for the router endpoint
const aliasModel: ProcessedModel = {
...aliasBase,
isRouter: true,
// getEndpoint uses the router wrapper regardless of the endpoints array
getEndpoint: async (options?: EndpointOptions): Promise<Endpoint> =>
makeRouterEndpoint(aliasModel, options),
} as ProcessedModel;
// Put alias first
decorated = [aliasModel, ...decorated];
}
export const models = decorated as typeof builtModels;
export type ProcessedModel = (typeof models)[number] & { isRouter?: boolean };
// super ugly but not sure how to make typescript happier
export const validModelIdSchema = z.enum(models.map((m) => m.id) as [string, ...string[]]);
export const defaultModel = models[0];
// Models that have been deprecated
export const oldModels = config.OLD_MODELS
? z
.array(
z.object({
id: z.string().optional(),
name: z.string().min(1),
displayName: z.string().min(1).optional(),
transferTo: validModelIdSchema.optional(),
})
)
.parse(JSON5.parse(sanitizeJSONEnv(config.OLD_MODELS, "[]")))
.map((m) => ({ ...m, id: m.id || m.name, displayName: m.displayName || m.name }))
: [];
export const validateModel = (_models: BackendModel[]) => {
// Zod enum function requires 2 parameters
return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]);
};
// if `TASK_MODEL` is string & name of a model in `MODELS`, then we use `MODELS[TASK_MODEL]`, else we try to parse `TASK_MODEL` as a model config itself
export const taskModel = addEndpoint(
config.TASK_MODEL
? (models.find((m) => m.name === config.TASK_MODEL || m.id === config.TASK_MODEL) ??
defaultModel)
: defaultModel
);
export type BackendModel = Optional<
typeof defaultModel,
"preprompt" | "parameters" | "multimodal" | "unlisted" | "hasInferenceAPI"
>;
|