Spaces:

aukaru
/

344

Paused

App Files Files Community

344 / src /proxy /google-ai.ts

aukaru

Upload 236 files

5c5b371 verified 8 months ago

raw

history blame contribute delete

9.89 kB

	import { Request, RequestHandler, Router, Response, NextFunction } from "express";
	import { v4 } from "uuid";
	import { GoogleAIKey, keyPool } from "../shared/key-management";
	import { config } from "../config";
	import { ipLimiter } from "./rate-limit";
	import {
	createPreprocessorMiddleware,
	finalizeSignedRequest,
	} from "./middleware/request";
	import { ProxyResHandlerWithBody } from "./middleware/response";
	import { addGoogleAIKey } from "./middleware/request/mutators/add-google-ai-key";
	import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory";
	import axios from "axios";

	let modelsCache: any = null;
	let modelsCacheTime = 0;

	// Cache for native Google AI models
	let nativeModelsCache: any = null;
	let nativeModelsCacheTime = 0;

	// https://ai.google.dev/models/gemini
	// TODO: list models https://ai.google.dev/tutorials/rest_quickstart#list_models

	const getModelsResponse = () => {
	if (new Date().getTime() - modelsCacheTime < 1000 * 60) {
	return modelsCache;
	}

	if (!config.googleAIKey) return { object: "list", data: [] };

	const keys = keyPool
	.list()
	.filter((k) => k.service === "google-ai") as GoogleAIKey[];
	if (keys.length === 0) {
	modelsCache = { object: "list", data: [] };
	modelsCacheTime = new Date().getTime();
	return modelsCache;
	}

	// Get all model IDs from keys, excluding any with "bard" in the name
	const modelIds = Array.from(
	new Set(keys.map((k) => k.modelIds).flat())
	).filter((id) => id.startsWith("models/") && !id.includes("bard"));

	// Strip "models/" prefix from IDs before creating model objects
	const models = modelIds.map((id) => ({
	// Strip "models/" prefix from ID for consistency with request processing
	id: id.startsWith("models/") ? id.slice("models/".length) : id,
	object: "model",
	created: new Date().getTime(),
	owned_by: "google",
	permission: [],
	root: "google",
	parent: null,
	}));

	modelsCache = { object: "list", data: models };
	modelsCacheTime = new Date().getTime();

	return modelsCache;
	};

	// Function to fetch native models from Google AI API
	const getNativeModelsResponse = async () => {
	// Return cached value if it was refreshed in the last minute
	if (new Date().getTime() - nativeModelsCacheTime < 1000 * 60) {
	return nativeModelsCache;
	}

	/*
	* The official Google API requires an API key. However SillyTavern only needs
	* a list of model IDs and does not care about any other model metadata. We
	* can therefore generate a synthetic response from the keys already
	* loaded into the proxy (same source we use for the OpenAI-compatible
	* endpoint) and completely avoid the outbound request. This removes the
	* need for the frontend to supply the proxy password as an API key and
	* prevents 4xx/5xx errors when the real Google API is unreachable or the key
	* is missing.
	*/
	const openaiStyle = getModelsResponse();
	const models = (openaiStyle.data \|\| []).map((m: any) => ({
	// Google AI Studio returns names in the format "models/<id>"
	name: `models/${m.id}`,
	supportedGenerationMethods: ["generateContent"],
	}));

	nativeModelsCache = { models };
	nativeModelsCacheTime = new Date().getTime();
	return nativeModelsCache;
	};

	const handleModelRequest: RequestHandler = (_req: Request, res: any) => {
	res.status(200).json(getModelsResponse());
	};

	// Native Gemini API model list request
	const handleNativeModelRequest: RequestHandler = async (_req: Request, res: any) => {
	try {
	const modelsResponse = await getNativeModelsResponse();
	res.status(200).json(modelsResponse);
	} catch (error) {
	console.error("Error in handleNativeModelRequest:", error);
	res.status(500).json({ error: "Failed to fetch models" });
	}
	};

	const googleAIBlockingResponseHandler: ProxyResHandlerWithBody = async (
	_proxyRes,
	req,
	res,
	body
	) => {
	if (typeof body !== "object") {
	throw new Error("Expected body to be an object");
	}

	let newBody = body;
	if (req.inboundApi === "openai") {
	req.log.info("Transforming Google AI response to OpenAI format");
	newBody = transformGoogleAIResponse(body, req);
	}

	res.status(200).json({ ...newBody, proxy: body.proxy });
	};

	function transformGoogleAIResponse(
	resBody: Record<string, any>,
	req: Request
	): Record<string, any> {
	const totalTokens = (req.promptTokens ?? 0) + (req.outputTokens ?? 0);

	// Handle the case where content might have different structures
	let content = "";

	// Check if the response has the expected structure
	if (resBody.candidates && resBody.candidates[0]) {
	const candidate = resBody.candidates[0];

	// Extract content text with multiple fallbacks
	if (candidate.content?.parts && candidate.content.parts[0]?.text) {
	// Regular format with parts array containing text
	content = candidate.content.parts[0].text;
	} else if (candidate.content?.text) {
	// Alternate format with direct text property
	content = candidate.content.text;
	} else if (typeof candidate.content?.parts?.[0] === 'string') {
	// Some formats might have string parts
	content = candidate.content.parts[0];
	}

	// Apply cleanup to the content if needed
	content = content.replace(/^(.{0,50}?): /, () => "");
	}

	return {
	id: "goo-" + v4(),
	object: "chat.completion",
	created: Date.now(),
	model: req.body.model,
	usage: {
	prompt_tokens: req.promptTokens,
	completion_tokens: req.outputTokens,
	total_tokens: totalTokens,
	},
	choices: [
	{
	message: { role: "assistant", content },
	finish_reason: resBody.candidates?.[0]?.finishReason \|\| "STOP",
	index: 0,
	},
	],
	};
	}

	const googleAIProxy = createQueuedProxyMiddleware({
	target: ({ signedRequest }: { signedRequest: any }) => {
	if (!signedRequest) throw new Error("Must sign request before proxying");
	const { protocol, hostname} = signedRequest;
	return `${protocol}//${hostname}`;
	},
	mutations: [addGoogleAIKey, finalizeSignedRequest],
	blockingResponseHandler: googleAIBlockingResponseHandler,
	});

	const googleAIRouter = Router();
	googleAIRouter.get("/v1/models", handleModelRequest);
	googleAIRouter.get("/:apiVersion(v1alpha\|v1beta)/models", handleNativeModelRequest);

	/**
	* Processes the thinking budget for Gemini 2.5 Flash model.
	* Ensures the budget is within the valid range of 0-24576 tokens.
	*/
	function processThinkingBudget(req: Request) {
	if (req.body.generationConfig?.thinkingConfig?.thinkingBudget !== undefined) {
	// Ensure thinkingBudget is within allowed range (0-24576)
	const budget = req.body.generationConfig.thinkingConfig.thinkingBudget;

	// If it's a number, validate range
	if (typeof budget === 'number') {
	req.body.generationConfig.thinkingConfig.thinkingBudget =
	Math.max(0, Math.min(budget, 24576));
	}
	// If it's "auto", leave as is
	}
	}

	function setStreamFlag(req: Request) {
	const isStreaming = req.url.includes("streamGenerateContent");
	if (isStreaming) {
	req.body.stream = true;
	req.isStreaming = true;
	} else {
	req.body.stream = false;
	req.isStreaming = false;
	}
	}

	/**
	* Strips 'models/' prefix from the beginning of model IDs if present.
	* No longer forces redirection to gemini-1.5-pro-latest for non-Gemini models.
	**/
	function maybeReassignModel(req: Request) {
	// Ensure model is on body as a lot of middleware will expect it.
	const model = req.body.model \|\| req.url.split("/").pop()?.split(":").shift();
	if (!model) {
	throw new Error("You must specify a model with your request.");
	}
	req.body.model = model;

	// Only strip the 'models/' prefix if present
	if (model.startsWith("models/")) {
	req.body.model = model.slice("models/".length);
	req.log.info({ originalModel: model, updatedModel: req.body.model }, "Stripped 'models/' prefix from model ID");
	}

	// No longer redirecting non-Gemini models to gemini-1.5-pro-latest
	// This allows the original model to be passed through to the API
	// If it's an invalid model, the Google AI API will return the appropriate error
	}

	/**
	* Middleware to check for and block requests to experimental models.
	* This function is intended to be used as a RequestPreprocessor.
	* It throws an error if an experimental model is detected, which should be
	* caught by the proxy's onError handler.
	*/
	function checkAndBlockExperimentalModels(req: Request) { // Changed signature
	const modelId = req.body.model as string \| undefined;

	// Check if the model ID contains "exp" (case-insensitive)
	if (modelId && modelId.toLowerCase().includes("exp")) {
	req.log.warn({ modelId }, "Blocking request to experimental Google AI model.");
	const err: any = new Error("Experimental models are too unstable to be supported in proxy code. Please use preview models instead.");
	err.statusCode = 400;
	throw err;
	}
	// If no experimental model, do nothing, allowing request to proceed.
	}

	// Native Google AI chat completion endpoint
	googleAIRouter.post(
	"/:apiVersion(v1alpha\|v1beta)/models/:modelId:(generateContent\|streamGenerateContent)",
	ipLimiter,
	createPreprocessorMiddleware(
	{ inApi: "google-ai", outApi: "google-ai", service: "google-ai" },
	{
	beforeTransform: [maybeReassignModel],
	afterTransform: [checkAndBlockExperimentalModels, setStreamFlag, processThinkingBudget]
	}
	),
	googleAIProxy
	);

	// OpenAI-to-Google AI compatibility endpoint.
	googleAIRouter.post(
	"/v1/chat/completions",
	ipLimiter,
	createPreprocessorMiddleware(
	{ inApi: "openai", outApi: "google-ai", service: "google-ai" },
	{
	afterTransform: [maybeReassignModel, checkAndBlockExperimentalModels, processThinkingBudget]
	}
	),
	googleAIProxy
	);

	export const googleAI = googleAIRouter;