pluralchat

Sleeping

pluralchat / src /lib /server /router /endpoint.ts

Andrew

(feat) Pass user api key through router endpoints

c4408b8 3 months ago

5.79 kB

	import type {
	Endpoint,
	EndpointParameters,
	EndpointMessage,
	TextGenerationStreamOutputSimplified,
	} from "../endpoints/endpoints";
	import endpoints from "../endpoints/endpoints";
	import type { ProcessedModel, EndpointOptions } from "../models";
	import { config } from "$lib/server/config";
	import { logger } from "$lib/server/logger";
	import { archSelectRoute } from "./arch";
	import { getRoutes, resolveRouteModels } from "./policy";

	const REASONING_BLOCK_REGEX = /<think>[\s\S]*?(?:<\/think>\|$)/g;

	const ROUTER_MULTIMODAL_ROUTE = "multimodal";

	function stripReasoningBlocks(text: string): string {
	const stripped = text.replace(REASONING_BLOCK_REGEX, "");
	return stripped === text ? text : stripped.trim();
	}

	function stripReasoningFromMessage(message: EndpointMessage): EndpointMessage {
	const { reasoning: _reasoning, ...rest } = message;
	void _reasoning;
	const content =
	typeof message.content === "string" ? stripReasoningBlocks(message.content) : message.content;
	return {
	...rest,
	content,
	};
	}

	/**
	* Create an Endpoint that performs route selection via Arch and then forwards
	* to the selected model (with fallbacks) using the OpenAI-compatible endpoint.
	*/
	export async function makeRouterEndpoint(
	routerModel: ProcessedModel,
	options?: EndpointOptions
	): Promise<Endpoint> {
	return async function routerEndpoint(params: EndpointParameters) {
	const routes = await getRoutes();
	const sanitizedMessages = params.messages.map(stripReasoningFromMessage);
	const routerMultimodalEnabled =
	(config.LLM_ROUTER_ENABLE_MULTIMODAL \|\| "").toLowerCase() === "true";
	const hasImageInput = sanitizedMessages.some((message) =>
	(message.files ?? []).some(
	(file) => typeof file?.mime === "string" && file.mime.startsWith("image/")
	)
	);

	// Helper to create an OpenAI endpoint for a specific candidate model id
	async function createCandidateEndpoint(candidateModelId: string): Promise<Endpoint> {
	// Try to use the real candidate model config if present in chat-ui's model list
	let modelForCall: ProcessedModel \| undefined;
	try {
	const mod = await import("../models");
	const all = (mod as { models: ProcessedModel[] }).models;
	modelForCall = all?.find((m) => m.id === candidateModelId \|\| m.name === candidateModelId);
	} catch (e) {
	logger.warn({ err: String(e) }, "[router] failed to load models for candidate lookup");
	}

	if (!modelForCall) {
	// Fallback: clone router model with candidate id
	modelForCall = {
	...routerModel,
	id: candidateModelId,
	name: candidateModelId,
	displayName: candidateModelId,
	} as ProcessedModel;
	}

	const defaultApiKey = config.OPENAI_API_KEY \|\| config.HF_TOKEN \|\| "sk-";

	return endpoints.openai({
	type: "openai",
	baseURL: (config.OPENAI_BASE_URL \|\| "https://router.huggingface.co/v1").replace(/\/$/, ""),
	apiKey: options?.apiKey ?? defaultApiKey,
	model: modelForCall,
	// Ensure streaming path is used
	streamingSupported: true,
	});
	}

	// Yield router metadata for immediate UI display, using the actual candidate
	async function* metadataThenStream(
	gen: AsyncGenerator<TextGenerationStreamOutputSimplified>,
	actualModel: string,
	selectedRoute: string
	) {
	yield {
	token: { id: 0, text: "", special: true, logprob: 0 },
	generated_text: null,
	details: null,
	routerMetadata: { route: selectedRoute, model: actualModel },
	};
	for await (const ev of gen) yield ev;
	}

	async function findFirstMultimodalCandidateId(): Promise<string \| undefined> {
	try {
	const mod = await import("../models");
	const all = (mod as { models: ProcessedModel[] }).models;
	const first = all?.find((m) => !m.isRouter && m.multimodal);
	return first?.id ?? first?.name;
	} catch (e) {
	logger.warn({ err: String(e) }, "[router] failed to load models for multimodal lookup");
	return undefined;
	}
	}

	if (routerMultimodalEnabled && hasImageInput) {
	const multimodalCandidate = await findFirstMultimodalCandidateId();
	if (!multimodalCandidate) {
	throw new Error(
	"No multimodal models are configured for the router. Remove the image or enable a multimodal model."
	);
	}

	try {
	logger.info(
	{ route: ROUTER_MULTIMODAL_ROUTE, model: multimodalCandidate },
	"[router] multimodal input detected; bypassing Arch selection"
	);
	const ep = await createCandidateEndpoint(multimodalCandidate);
	const gen = await ep({ ...params });
	return metadataThenStream(gen, multimodalCandidate, ROUTER_MULTIMODAL_ROUTE);
	} catch (e) {
	logger.error(
	{ route: ROUTER_MULTIMODAL_ROUTE, model: multimodalCandidate, err: String(e) },
	"[router] multimodal fallback failed"
	);
	throw new Error(
	"Failed to call the configured multimodal model. Remove the image or try again later."
	);
	}
	}

	const { routeName } = await archSelectRoute(sanitizedMessages, { apiKey: options?.apiKey });

	const fallbackModel = config.LLM_ROUTER_FALLBACK_MODEL \|\| routerModel.id;
	const { candidates } = resolveRouteModels(routeName, routes, fallbackModel);

	let lastErr: unknown = undefined;
	for (const candidate of candidates) {
	try {
	logger.info({ route: routeName, model: candidate }, "[router] trying candidate");
	const ep = await createCandidateEndpoint(candidate);
	const gen = await ep({ ...params });
	return metadataThenStream(gen, candidate, routeName);
	} catch (e) {
	lastErr = e;
	logger.warn(
	{ route: routeName, model: candidate, err: String(e) },
	"[router] candidate failed"
	);
	continue;
	}
	}

	// Exhausted all candidates — throw to signal upstream failure
	throw new Error(`Routing failed for route=${routeName}: ${String(lastErr)}`);
	};
	}