|
|
import { Request, RequestHandler, Router } from "express"; |
|
|
import { config } from "../config"; |
|
|
import { AzureOpenAIKey, keyPool, OpenAIKey } from "../shared/key-management"; |
|
|
import { getOpenAIModelFamily } from "../shared/models"; |
|
|
import { ipLimiter } from "./rate-limit"; |
|
|
import { |
|
|
addKey, |
|
|
addKeyForEmbeddingsRequest, |
|
|
createEmbeddingsPreprocessorMiddleware, |
|
|
createPreprocessorMiddleware, |
|
|
finalizeBody, |
|
|
RequestPreprocessor, |
|
|
} from "./middleware/request"; |
|
|
import { ProxyResHandlerWithBody } from "./middleware/response"; |
|
|
import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory"; |
|
|
|
|
|
|
|
|
let modelsCache: any = null; |
|
|
let modelsCacheTime = 0; |
|
|
|
|
|
export function generateModelList(service: "openai" | "azure") { |
|
|
const keys = keyPool |
|
|
.list() |
|
|
.filter((k) => k.service === service && !k.isDisabled) as |
|
|
| OpenAIKey[] |
|
|
| AzureOpenAIKey[]; |
|
|
if (keys.length === 0) return []; |
|
|
|
|
|
const allowedModelFamilies = new Set(config.allowedModelFamilies); |
|
|
const modelFamilies = new Set( |
|
|
keys |
|
|
.flatMap((k) => k.modelFamilies) |
|
|
.filter((f) => allowedModelFamilies.has(f)) |
|
|
); |
|
|
|
|
|
const modelIds = new Set( |
|
|
keys |
|
|
.flatMap((k) => k.modelIds) |
|
|
.filter((id) => { |
|
|
const allowed = modelFamilies.has(getOpenAIModelFamily(id)); |
|
|
const known = ["gpt", "o", "dall-e", "chatgpt", "text-embedding", "codex"].some( |
|
|
(prefix) => id.startsWith(prefix) |
|
|
); |
|
|
const isFinetune = id.includes("ft"); |
|
|
return allowed && known && !isFinetune; |
|
|
}) |
|
|
); |
|
|
|
|
|
return Array.from(modelIds).map((id) => ({ |
|
|
id, |
|
|
object: "model", |
|
|
created: new Date().getTime(), |
|
|
owned_by: service, |
|
|
permission: [ |
|
|
{ |
|
|
id: "modelperm-" + id, |
|
|
object: "model_permission", |
|
|
created: new Date().getTime(), |
|
|
organization: "*", |
|
|
group: null, |
|
|
is_blocking: false, |
|
|
}, |
|
|
], |
|
|
root: id, |
|
|
parent: null, |
|
|
})); |
|
|
} |
|
|
|
|
|
const handleModelRequest: RequestHandler = (_req, res) => { |
|
|
if (new Date().getTime() - modelsCacheTime < 1000 * 60) { |
|
|
return res.status(200).json(modelsCache); |
|
|
} |
|
|
|
|
|
if (!config.openaiKey) return { object: "list", data: [] }; |
|
|
|
|
|
const result = generateModelList("openai"); |
|
|
|
|
|
modelsCache = { object: "list", data: result }; |
|
|
modelsCacheTime = new Date().getTime(); |
|
|
res.status(200).json(modelsCache); |
|
|
}; |
|
|
|
|
|
|
|
|
const rewriteForTurboInstruct: RequestPreprocessor = (req) => { |
|
|
|
|
|
|
|
|
|
|
|
if (req.body.prompt && !req.body.messages) { |
|
|
req.inboundApi = "openai-text"; |
|
|
} else if (req.body.messages && !req.body.prompt) { |
|
|
req.inboundApi = "openai"; |
|
|
|
|
|
|
|
|
req.body.model = "gpt-3.5-turbo-instruct"; |
|
|
} else { |
|
|
throw new Error("`prompt` OR `messages` must be provided"); |
|
|
} |
|
|
|
|
|
req.url = "/v1/completions"; |
|
|
}; |
|
|
|
|
|
const openaiResponseHandler: ProxyResHandlerWithBody = async ( |
|
|
_proxyRes, |
|
|
req, |
|
|
res, |
|
|
body |
|
|
) => { |
|
|
if (typeof body !== "object") { |
|
|
throw new Error("Expected body to be an object"); |
|
|
} |
|
|
|
|
|
const interval = (req as any)._keepAliveInterval |
|
|
if (interval) { |
|
|
clearInterval(interval); |
|
|
res.write(JSON.stringify(body)); |
|
|
res.end(); |
|
|
return; |
|
|
} |
|
|
|
|
|
let newBody = body; |
|
|
if (req.outboundApi === "openai-text" && req.inboundApi === "openai") { |
|
|
req.log.info("Transforming Turbo-Instruct response to Chat format"); |
|
|
newBody = transformTurboInstructResponse(body); |
|
|
} else if (req.outboundApi === "openai-responses" && req.inboundApi === "openai") { |
|
|
req.log.info("Transforming Responses API response to Chat format"); |
|
|
newBody = transformResponsesApiResponse(body); |
|
|
} |
|
|
|
|
|
res.status(200).json({ ...newBody, proxy: body.proxy }); |
|
|
}; |
|
|
|
|
|
function transformTurboInstructResponse( |
|
|
turboInstructBody: Record<string, any> |
|
|
): Record<string, any> { |
|
|
const transformed = { ...turboInstructBody }; |
|
|
transformed.choices = [ |
|
|
{ |
|
|
...turboInstructBody.choices[0], |
|
|
message: { |
|
|
role: "assistant", |
|
|
content: turboInstructBody.choices[0].text.trim(), |
|
|
}, |
|
|
}, |
|
|
]; |
|
|
delete transformed.choices[0].text; |
|
|
return transformed; |
|
|
} |
|
|
|
|
|
function transformResponsesApiResponse( |
|
|
responsesBody: Record<string, any> |
|
|
): Record<string, any> { |
|
|
|
|
|
if (responsesBody.choices && responsesBody.choices[0]?.message) { |
|
|
return responsesBody; |
|
|
} |
|
|
|
|
|
|
|
|
const transformed: Record<string, any> = { |
|
|
id: responsesBody.id || `chatcmpl-${Date.now()}`, |
|
|
object: "chat.completion", |
|
|
created: responsesBody.created_at || Math.floor(Date.now() / 1000), |
|
|
model: responsesBody.model || "o1-pro", |
|
|
choices: [], |
|
|
usage: responsesBody.usage || { |
|
|
prompt_tokens: 0, |
|
|
completion_tokens: 0, |
|
|
total_tokens: 0 |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (responsesBody.output && Array.isArray(responsesBody.output)) { |
|
|
|
|
|
let messageOutput = null; |
|
|
for (const output of responsesBody.output) { |
|
|
if (output.type === "message") { |
|
|
messageOutput = output; |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
if (messageOutput) { |
|
|
if (messageOutput.content && Array.isArray(messageOutput.content) && messageOutput.content.length > 0) { |
|
|
|
|
|
let content = ""; |
|
|
const toolCalls: any[] = []; |
|
|
|
|
|
for (const contentItem of messageOutput.content) { |
|
|
if (contentItem.type === "output_text") { |
|
|
content += contentItem.text; |
|
|
} else if (contentItem.type === "tool_calls" && Array.isArray(contentItem.tool_calls)) { |
|
|
toolCalls.push(...contentItem.tool_calls); |
|
|
} |
|
|
} |
|
|
|
|
|
const message: Record<string, any> = { |
|
|
role: messageOutput.role || "assistant", |
|
|
content: content |
|
|
}; |
|
|
|
|
|
if (toolCalls.length > 0) { |
|
|
message.tool_calls = toolCalls; |
|
|
} |
|
|
|
|
|
transformed.choices.push({ |
|
|
index: 0, |
|
|
message, |
|
|
finish_reason: "stop" |
|
|
}); |
|
|
} else if (typeof messageOutput.content === 'string') { |
|
|
|
|
|
transformed.choices.push({ |
|
|
index: 0, |
|
|
message: { |
|
|
role: messageOutput.role || "assistant", |
|
|
content: messageOutput.content |
|
|
}, |
|
|
finish_reason: "stop" |
|
|
}); |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
else if (responsesBody.response && responsesBody.response.content) { |
|
|
transformed.choices.push({ |
|
|
index: 0, |
|
|
message: { |
|
|
role: "assistant", |
|
|
content: typeof responsesBody.response.content === 'string' |
|
|
? responsesBody.response.content |
|
|
: JSON.stringify(responsesBody.response.content) |
|
|
}, |
|
|
finish_reason: responsesBody.response.finish_reason || "stop" |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
else if (responsesBody.content) { |
|
|
transformed.choices.push({ |
|
|
index: 0, |
|
|
message: { |
|
|
role: "assistant", |
|
|
content: typeof responsesBody.content === 'string' |
|
|
? responsesBody.content |
|
|
: JSON.stringify(responsesBody.content) |
|
|
}, |
|
|
finish_reason: "stop" |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (transformed.choices.length === 0) { |
|
|
transformed.choices.push({ |
|
|
index: 0, |
|
|
message: { |
|
|
role: "assistant", |
|
|
content: "" |
|
|
}, |
|
|
finish_reason: "stop" |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (responsesBody.usage) { |
|
|
transformed.usage = { |
|
|
prompt_tokens: responsesBody.usage.input_tokens || 0, |
|
|
completion_tokens: responsesBody.usage.output_tokens || 0, |
|
|
total_tokens: responsesBody.usage.total_tokens || 0 |
|
|
}; |
|
|
} |
|
|
|
|
|
return transformed; |
|
|
} |
|
|
|
|
|
const openaiProxy = createQueuedProxyMiddleware({ |
|
|
mutations: [addKey, finalizeBody], |
|
|
target: "https://api.openai.com", |
|
|
blockingResponseHandler: openaiResponseHandler, |
|
|
}); |
|
|
|
|
|
const openaiEmbeddingsProxy = createQueuedProxyMiddleware({ |
|
|
mutations: [addKeyForEmbeddingsRequest, finalizeBody], |
|
|
target: "https://api.openai.com", |
|
|
}); |
|
|
|
|
|
|
|
|
const openaiResponsesProxy = createQueuedProxyMiddleware({ |
|
|
mutations: [addKey, finalizeBody], |
|
|
target: "https://api.openai.com", |
|
|
blockingResponseHandler: openaiResponseHandler, |
|
|
}); |
|
|
|
|
|
const openaiRouter = Router(); |
|
|
openaiRouter.get("/v1/models", handleModelRequest); |
|
|
|
|
|
openaiRouter.post( |
|
|
"/v1/completions", |
|
|
ipLimiter, |
|
|
createPreprocessorMiddleware({ |
|
|
inApi: "openai-text", |
|
|
outApi: "openai-text", |
|
|
service: "openai", |
|
|
}), |
|
|
openaiProxy |
|
|
); |
|
|
|
|
|
openaiRouter.post( |
|
|
/\/v1\/turbo-instruct\/(v1\/)?chat\/completions/, |
|
|
ipLimiter, |
|
|
createPreprocessorMiddleware( |
|
|
{ inApi: "openai", outApi: "openai-text", service: "openai" }, |
|
|
{ |
|
|
beforeTransform: [rewriteForTurboInstruct], |
|
|
afterTransform: [forceModel("gpt-3.5-turbo-instruct")], |
|
|
} |
|
|
), |
|
|
openaiProxy |
|
|
); |
|
|
|
|
|
const setupChunkedTransfer: RequestHandler = (req, res, next) => { |
|
|
req.log.info("Setting chunked transfer for o1 to prevent Cloudflare timeouts") |
|
|
|
|
|
|
|
|
if (req.body.model?.startsWith("codex-mini") && req.body.stream === true) { |
|
|
return res.status(400).json({ |
|
|
error: { |
|
|
message: "The codex-mini models do not support streaming. Please set 'stream: false' in your request.", |
|
|
type: "invalid_request_error", |
|
|
param: "stream", |
|
|
code: "streaming_not_supported" |
|
|
} |
|
|
}); |
|
|
} |
|
|
|
|
|
|
|
|
if (req.body.model === "o1" || req.body.model === "o1-2024-12-17") { |
|
|
req.isChunkedTransfer = true; |
|
|
res.writeHead(200, { |
|
|
'Content-Type': 'application/json', |
|
|
'Transfer-Encoding': 'chunked' |
|
|
}); |
|
|
|
|
|
|
|
|
|
|
|
const keepAlive = setInterval(() => { |
|
|
res.write(' '.repeat(4096)); |
|
|
}, 48_000); |
|
|
|
|
|
(req as any)._keepAliveInterval = keepAlive; |
|
|
} |
|
|
next(); |
|
|
}; |
|
|
|
|
|
|
|
|
function shouldUseResponsesApi(model: string): boolean { |
|
|
return model === "o1-pro" || model.startsWith("o1-pro-") || |
|
|
model === "o3-pro" || model.startsWith("o3-pro-") || |
|
|
model === "codex-mini-latest" || model.startsWith("codex-mini-"); |
|
|
} |
|
|
|
|
|
|
|
|
const routeToResponsesApi: RequestPreprocessor = (req) => { |
|
|
if (shouldUseResponsesApi(req.body.model)) { |
|
|
req.log.info(`Routing ${req.body.model} to OpenAI Responses API`); |
|
|
req.url = "/v1/responses"; |
|
|
req.outboundApi = "openai-responses"; |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
openaiRouter.post( |
|
|
"/v1/chat/completions", |
|
|
ipLimiter, |
|
|
createPreprocessorMiddleware( |
|
|
{ inApi: "openai", outApi: "openai", service: "openai" }, |
|
|
{ |
|
|
afterTransform: [ |
|
|
fixupMaxTokens, |
|
|
setO1ReasoningEffort, |
|
|
routeToResponsesApi |
|
|
] |
|
|
} |
|
|
), |
|
|
setupChunkedTransfer, |
|
|
(req, _res, next) => { |
|
|
|
|
|
if (req.outboundApi === "openai-responses") { |
|
|
|
|
|
req.log.info("Final check for Responses API format in chat completions"); |
|
|
if (req.body.messages) { |
|
|
req.log.info("Moving 'messages' to 'input' for Responses API"); |
|
|
req.body.input = req.body.messages; |
|
|
delete req.body.messages; |
|
|
} else if (req.body.input && req.body.input.messages) { |
|
|
req.log.info("Reformatting input.messages for Responses API"); |
|
|
req.body.input = req.body.input.messages; |
|
|
} |
|
|
|
|
|
return openaiResponsesProxy(req, _res, next); |
|
|
} |
|
|
next(); |
|
|
}, |
|
|
openaiProxy |
|
|
); |
|
|
|
|
|
|
|
|
openaiRouter.post( |
|
|
"/v1/responses", |
|
|
ipLimiter, |
|
|
createPreprocessorMiddleware( |
|
|
{ inApi: "openai", outApi: "openai-responses", service: "openai" }, |
|
|
{ afterTransform: [fixupMaxTokens, setO1ReasoningEffort] } |
|
|
), |
|
|
|
|
|
(req, _res, next) => { |
|
|
req.log.info("Final check for Responses API format"); |
|
|
|
|
|
|
|
|
if (req.body.messages) { |
|
|
req.log.info("Moving 'messages' to 'input' for Responses API"); |
|
|
req.body.input = req.body.messages; |
|
|
delete req.body.messages; |
|
|
} else if (req.body.input && req.body.input.messages) { |
|
|
req.log.info("Reformatting input.messages for Responses API"); |
|
|
req.body.input = req.body.input.messages; |
|
|
} |
|
|
|
|
|
next(); |
|
|
}, |
|
|
openaiResponsesProxy |
|
|
); |
|
|
|
|
|
|
|
|
openaiRouter.post( |
|
|
"/v1/embeddings", |
|
|
ipLimiter, |
|
|
createEmbeddingsPreprocessorMiddleware(), |
|
|
openaiEmbeddingsProxy |
|
|
); |
|
|
|
|
|
function forceModel(model: string): RequestPreprocessor { |
|
|
return (req: Request) => void (req.body.model = model); |
|
|
} |
|
|
|
|
|
function fixupMaxTokens(req: Request) { |
|
|
|
|
|
if (req.outboundApi === "openai-responses") { |
|
|
if (!req.body.max_output_tokens) { |
|
|
req.body.max_output_tokens = req.body.max_tokens || req.body.max_completion_tokens; |
|
|
} |
|
|
|
|
|
delete req.body.max_tokens; |
|
|
delete req.body.max_completion_tokens; |
|
|
|
|
|
|
|
|
const unsupportedParams = ['frequency_penalty', 'presence_penalty']; |
|
|
for (const param of unsupportedParams) { |
|
|
if (req.body[param] !== undefined) { |
|
|
req.log.info(`Removing unsupported parameter for Responses API: ${param}`); |
|
|
delete req.body[param]; |
|
|
} |
|
|
} |
|
|
} else { |
|
|
|
|
|
if (!req.body.max_completion_tokens) { |
|
|
req.body.max_completion_tokens = req.body.max_tokens; |
|
|
} |
|
|
delete req.body.max_tokens; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
function isO1Model(model: string): boolean { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return /^o\d+(-mini|-pro|-preview)?(-\d{4}-\d{2}-\d{2})?$/.test(model) || |
|
|
/^codex-mini(-latest|-\d{4}-\d{2}-\d{2})?$/.test(model); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
function setO1ReasoningEffort(req: Request) { |
|
|
const effort = process.env.O1_REASONING_EFFORT?.toLowerCase(); |
|
|
if (!effort || !isO1Model(req.body.model) || req.body.reasoning_effort) return; |
|
|
|
|
|
if (['low', 'medium', 'high'].includes(effort)) { |
|
|
req.body.reasoning_effort = effort; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
export const openai = openaiRouter; |
|
|
|