llm-proxy / src /index.ts
relfa's picture
feat: Generalize Gemini API routing to `/v1beta/*` for all methods and dynamically forward the original request method and body.
5a6f847
import Fastify, { type FastifyError } from 'fastify';
import rateLimit from '@fastify/rate-limit';
import { loadConfig } from './config.js';
import { createAuthHook } from './auth.js';
import { forwardRequest } from './proxy.js';
import {
ANTHROPIC_ROUTES,
ANTHROPIC_FORWARDED_HEADERS,
GEMINI_FORWARDED_HEADERS,
} from './types.js';
import type { ProviderConfig } from './types.js';
const config = loadConfig();
const app = Fastify({
logger: {
level: config.logLevel,
// Redact sensitive fields from logs
redact: ['req.headers.authorization', 'req.headers["x-api-key"]', 'req.headers["x-goog-api-key"]'],
},
bodyLimit: config.bodyLimit,
trustProxy: true,
});
/** Register rate limiting. */
await app.register(rateLimit, {
max: config.rateLimitMax,
timeWindow: config.rateLimitWindowMs,
addHeadersOnExceeding: { 'x-ratelimit-limit': true, 'x-ratelimit-remaining': true, 'x-ratelimit-reset': true },
addHeaders: { 'x-ratelimit-limit': true, 'x-ratelimit-remaining': true, 'x-ratelimit-reset': true, 'retry-after': true },
});
/** Optional CORS support – @fastify/cors must be installed separately. */
if (config.corsOrigin) {
try {
// Dynamic import: @fastify/cors is an optional dependency
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
const corsPlugin = await import(/* webpackIgnore: true */ '@fastify/cors' + '');
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument, @typescript-eslint/no-unsafe-member-access
await app.register(corsPlugin.default ?? corsPlugin, { origin: config.corsOrigin });
} catch {
app.log.warn('CORS_ORIGIN is set but @fastify/cors is not installed. Run: npm install @fastify/cors');
}
}
/** Add defensive response headers to every response. */
app.addHook('onSend', async (_request, reply) => {
reply.header('X-Content-Type-Options', 'nosniff');
reply.header('X-Frame-Options', 'DENY');
});
/** Auth hook instance. */
const authHook = createAuthHook(config.proxyAuthToken);
/**
* Content-Type validation hook for API routes.
* Rejects requests that don't send application/json.
*/
async function validateContentType(
request: Parameters<typeof authHook>[0],
reply: Parameters<typeof authHook>[1],
): Promise<void> {
const ct = request.headers['content-type'];
if (!ct || !ct.includes('application/json')) {
reply.code(415).send({ error: 'Unsupported Media Type. Expected application/json.' });
}
}
// ── Routes ──────────────────────────────────────────────────────────────────
/** Health check – no auth required. Also served at / for HF Spaces probe. */
app.get('/health', async (_request, reply) => {
reply.send({ status: 'ok' });
});
app.get('/', async (_request, reply) => {
reply.send({ status: 'ok' });
});
// ── Anthropic Routes (conditional) ──────────────────────────────────────────
if (config.anthropicApiKey) {
const anthropicProvider: ProviderConfig = {
name: 'anthropic',
baseUrl: config.anthropicBaseUrl,
apiKey: config.anthropicApiKey,
apiKeyHeader: 'x-api-key',
forwardedHeaders: ANTHROPIC_FORWARDED_HEADERS,
};
for (const route of ANTHROPIC_ROUTES) {
app.post(route, {
onRequest: [authHook, validateContentType],
}, async (request, reply) => {
const upstreamUrl = `${anthropicProvider.baseUrl}${route}`;
await forwardRequest(request, reply, upstreamUrl, anthropicProvider, config, app.log);
});
}
app.log.info('Anthropic API relay routes registered');
} else {
app.log.info('ANTHROPIC_API_KEY not set – Anthropic relay disabled');
}
// ── Gemini Routes (conditional) ─────────────────────────────────────────────
if (config.geminiApiKey) {
const geminiProvider: ProviderConfig = {
name: 'gemini',
baseUrl: config.geminiBaseUrl,
apiKey: config.geminiApiKey,
apiKeyHeader: 'x-goog-api-key',
forwardedHeaders: GEMINI_FORWARDED_HEADERS,
};
/**
* Gemini catch-all: forward all requests under /v1beta/ to Google.
* Supports all methods (GET for model listing, POST for generation/counting).
* Content-Type validation only applies to POST requests with a body.
*/
app.all('/v1beta/*', {
onRequest: [authHook],
}, async (request, reply) => {
const wildcard = (request.params as Record<string, string>)['*'];
if (!wildcard) {
reply.code(400).send({ error: 'Invalid Gemini API path' });
return;
}
// Preserve query string (e.g. ?alt=sse for streaming)
const qsIndex = request.url.indexOf('?');
const queryString = qsIndex !== -1 ? request.url.slice(qsIndex) : '';
const upstreamUrl = `${geminiProvider.baseUrl}/v1beta/${wildcard}${queryString}`;
await forwardRequest(request, reply, upstreamUrl, geminiProvider, config, app.log);
});
app.log.info('Gemini API relay routes registered');
} else {
app.log.info('GEMINI_API_KEY not set – Gemini relay disabled');
}
// ── Error Handling ──────────────────────────────────────────────────────────
/** Catch-all 404 for unregistered routes. */
app.setNotFoundHandler((request, reply) => {
app.log.warn({ method: request.method, url: request.url }, 'Route not found');
reply.code(404).send({ error: 'Not found' });
});
/** Method not allowed – not needed since Fastify handles it,
* but we customize the response format. */
app.setErrorHandler((error: FastifyError, _request, reply) => {
const statusCode = error.statusCode ?? 500;
if (statusCode === 405) {
reply.code(405).send({ error: 'Method not allowed' });
return;
}
if (statusCode === 429) {
// Rate limit exceeded – forward Fastify's rate-limit response
reply.code(429).send({ error: 'Too many requests. Please retry later.' });
return;
}
app.log.error({ err: error }, 'Unhandled error');
reply.code(statusCode).send({ error: 'Internal server error' });
});
// ── Server Start ────────────────────────────────────────────────────────────
const start = async (): Promise<void> => {
try {
await app.listen({ port: config.port, host: config.host });
app.log.info(`Proxy listening on ${config.host}:${config.port}`);
} catch (err) {
app.log.fatal({ err }, 'Failed to start server');
process.exit(1);
}
};
// ── Graceful Shutdown ───────────────────────────────────────────────────────
const shutdown = async (signal: string): Promise<void> => {
app.log.info(`Received ${signal}, shutting down gracefully…`);
try {
await app.close();
app.log.info('Server closed.');
process.exit(0);
} catch (err) {
app.log.error({ err }, 'Error during shutdown');
process.exit(1);
}
};
process.on('SIGTERM', () => void shutdown('SIGTERM'));
process.on('SIGINT', () => void shutdown('SIGINT'));
// Catch unhandled rejections to keep the server stable
process.on('unhandledRejection', (reason) => {
app.log.error({ reason }, 'Unhandled promise rejection');
});
await start();