| import { Request, RequestHandler, Router } from "express"; |
| import { createPreprocessorMiddleware } from "./middleware/request"; |
| import { ipLimiter } from "./rate-limit"; |
| import { createQueuedProxyMiddleware } from "./middleware/request/proxy-middleware-factory"; |
| import { addKey, finalizeBody } from "./middleware/request"; |
| import { ProxyResHandlerWithBody } from "./middleware/response"; |
| import axios from "axios"; |
| import { QwenKey, keyPool } from "../shared/key-management"; |
| import { |
| isQwenModel, |
| isQwenThinkingModel, |
| normalizeMessages, |
| isQwen3Model, |
| isThinkingVariant, |
| isNonThinkingVariant, |
| getBaseModelName |
| } from "../shared/api-schemas/qwen"; |
| import { logger } from "../logger"; |
|
|
| const log = logger.child({ module: "proxy", service: "qwen" }); |
| let modelsCache: any = null; |
| let modelsCacheTime = 0; |
|
|
| const qwenResponseHandler: ProxyResHandlerWithBody = async ( |
| _proxyRes, |
| req, |
| res, |
| body |
| ) => { |
| if (typeof body !== "object") { |
| throw new Error("Expected body to be an object"); |
| } |
|
|
| res.status(200).json({ ...body, proxy: body.proxy }); |
| }; |
|
|
| const getModelsResponse = async () => { |
| |
| if (new Date().getTime() - modelsCacheTime < 1000 * 60) { |
| return modelsCache; |
| } |
|
|
| try { |
| |
| const modelToUse = "qwen-plus"; |
| const qwenKey = keyPool.get(modelToUse, "qwen") as QwenKey; |
| |
| if (!qwenKey || !qwenKey.key) { |
| log.warn("No valid Qwen key available for model listing"); |
| throw new Error("No valid Qwen API key available"); |
| } |
|
|
| |
| const response = await axios.get("https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", { |
| headers: { |
| "Content-Type": "application/json", |
| "Authorization": `Bearer ${qwenKey.key}` |
| }, |
| }); |
|
|
| if (!response.data || !response.data.data) { |
| throw new Error("Unexpected response format from Qwen API"); |
| } |
|
|
| |
| const models = response.data; |
| |
| |
| const knownQwenModels = [ |
| "qwen-max", |
| "qwen-max-latest", |
| "qwen-max-2025-01-25", |
| "qwen-plus", |
| "qwen-plus-latest", |
| "qwen-plus-2025-01-25", |
| "qwen-turbo", |
| "qwen-turbo-latest", |
| "qwen-turbo-2024-11-01", |
| "qwen3-235b-a22b", |
| "qwen3-32b", |
| "qwen3-30b-a3b" |
| ]; |
| |
| |
| if (models.data && Array.isArray(models.data)) { |
| |
| const existingModelIds = new Set(models.data.map((model: any) => model.id)); |
| |
| |
| models.data = models.data.filter((model: any) => { |
| return !isQwen3Model(model.id) || isThinkingVariant(model.id) || isNonThinkingVariant(model.id); |
| }); |
| |
| |
| knownQwenModels.forEach(modelId => { |
| if (!existingModelIds.has(modelId)) { |
| models.data.push({ |
| id: modelId, |
| object: "model", |
| created: Date.now(), |
| owned_by: "qwen", |
| capabilities: isQwenThinkingModel(modelId) ? { thinking: true } : {} |
| }); |
| } |
| }); |
| |
| |
| const processedModelIds = new Set(); |
| const originalModelsData = [...models.data]; |
| |
| models.data = originalModelsData.flatMap((model: any) => { |
| const modelId = model.id; |
| processedModelIds.add(modelId); |
| |
| |
| if (isQwenThinkingModel(modelId)) { |
| model.capabilities = model.capabilities || {}; |
| model.capabilities.thinking = true; |
| } |
| |
| |
| if (isQwen3Model(modelId) && |
| !isThinkingVariant(modelId) && |
| !isNonThinkingVariant(modelId)) { |
| |
| |
| const thinkingModel = { |
| id: `${modelId}-thinking`, |
| object: "model", |
| created: model.created || Date.now(), |
| owned_by: model.owned_by || "qwen", |
| capabilities: { thinking: true }, |
| proxy_managed: true, |
| display_name: `${model.display_name || modelId} (Thinking Mode)` |
| }; |
| |
| |
| const nonThinkingModel = { |
| id: `${modelId}-nonthinking`, |
| object: "model", |
| created: model.created || Date.now(), |
| owned_by: model.owned_by || "qwen", |
| capabilities: { thinking: true }, |
| proxy_managed: true, |
| display_name: `${model.display_name || modelId} (Standard Mode)` |
| }; |
| |
| |
| return [thinkingModel, nonThinkingModel]; |
| } |
| |
| return [model]; |
| }); |
| } else { |
| |
| models.data = knownQwenModels.flatMap(modelId => { |
| |
| if (isQwen3Model(modelId) && |
| !isThinkingVariant(modelId) && |
| !isNonThinkingVariant(modelId)) { |
| |
| return [ |
| { |
| id: `${modelId}-thinking`, |
| object: "model", |
| created: Date.now(), |
| owned_by: "qwen", |
| capabilities: { thinking: true }, |
| proxy_managed: true, |
| display_name: `${modelId} (Thinking Mode)` |
| }, |
| { |
| id: `${modelId}-nonthinking`, |
| object: "model", |
| created: Date.now(), |
| owned_by: "qwen", |
| capabilities: { thinking: true }, |
| proxy_managed: true, |
| display_name: `${modelId} (Standard Mode)` |
| } |
| ]; |
| } |
| |
| |
| const baseModel = { |
| id: modelId, |
| object: "model", |
| created: Date.now(), |
| owned_by: "qwen", |
| capabilities: isQwenThinkingModel(modelId) ? { thinking: true } : {} |
| }; |
| |
| return [baseModel]; |
| }); |
| } |
|
|
| log.debug({ modelCount: models.data?.length }, "Retrieved models from Qwen API"); |
|
|
| |
| modelsCache = models; |
| modelsCacheTime = new Date().getTime(); |
| return models; |
| } catch (error) { |
| |
| if (error instanceof Error) { |
| log.error( |
| { errorMessage: error.message, stack: error.stack }, |
| "Error fetching Qwen models" |
| ); |
| } else { |
| log.error({ error }, "Unknown error fetching Qwen models"); |
| } |
| |
| |
| return { |
| object: "list", |
| data: [], |
| }; |
| } |
| }; |
|
|
| const handleModelRequest: RequestHandler = async (_req, res) => { |
| try { |
| const models = await getModelsResponse(); |
| res.status(200).json(models); |
| } catch (error) { |
| if (error instanceof Error) { |
| log.error( |
| { errorMessage: error.message, stack: error.stack }, |
| "Error handling model request" |
| ); |
| } else { |
| log.error({ error }, "Unknown error handling model request"); |
| } |
| res.status(500).json({ error: "Failed to fetch models" }); |
| } |
| }; |
|
|
| |
| function prepareMessages(req: Request) { |
| if (req.body.messages && Array.isArray(req.body.messages)) { |
| req.body.messages = normalizeMessages(req.body.messages); |
| } |
| } |
|
|
| |
| function handleThinkingCapability(req: Request) { |
| const model = req.body.model; |
| |
| |
| if (isThinkingVariant(model)) { |
| |
| req.body.model = getBaseModelName(model); |
| |
| req.body.enable_thinking = true; |
| |
| |
| log.debug( |
| { originalModel: model, transformedModel: req.body.model, enableThinking: true }, |
| "Transformed request for thinking variant" |
| ); |
| return; |
| } |
| |
| if (isNonThinkingVariant(model)) { |
| |
| req.body.model = getBaseModelName(model); |
| |
| req.body.enable_thinking = false; |
| |
| |
| log.debug( |
| { originalModel: model, transformedModel: req.body.model, enableThinking: false }, |
| "Transformed request for non-thinking variant" |
| ); |
| return; |
| } |
| |
| |
| if (isQwenThinkingModel(model) && req.body.stream === true) { |
| |
| if (req.body.enable_thinking === undefined) { |
| req.body.enable_thinking = false; |
| } |
| |
| |
| if (req.body.thinking_budget !== undefined && req.body.enable_thinking === false) { |
| req.body.enable_thinking = true; |
| } |
| } else if (isQwenThinkingModel(model) && req.body.stream !== true) { |
| |
| req.body.enable_thinking = false; |
| } |
| } |
|
|
| |
| function removeUnsupportedParameters(req: Request) { |
| |
| if (req.body.logit_bias !== undefined) { |
| delete req.body.logit_bias; |
| } |
| |
| if (req.body.top_logprobs !== undefined) { |
| delete req.body.top_logprobs; |
| } |
| |
| |
| if (process.env.NODE_ENV !== 'production') { |
| log.debug({ body: req.body }, "Request after parameter cleanup"); |
| } |
| } |
|
|
| |
| function countQwenTokens(req: Request) { |
| const model = req.body.model; |
| |
| if (isQwenModel(model)) { |
| |
| if (req.promptTokens) { |
| req.log.debug( |
| { tokens: req.promptTokens }, |
| "Estimated token count for Qwen prompt" |
| ); |
| } |
| } |
| } |
|
|
| const qwenProxy = createQueuedProxyMiddleware({ |
| mutations: [ |
| addKey, |
| finalizeBody |
| ], |
| target: "https://dashscope-intl.aliyuncs.com/compatible-mode", |
| blockingResponseHandler: qwenResponseHandler, |
| }); |
|
|
| const qwenRouter = Router(); |
|
|
| qwenRouter.post( |
| "/v1/chat/completions", |
| ipLimiter, |
| createPreprocessorMiddleware( |
| { inApi: "openai", outApi: "openai", service: "qwen" }, |
| { afterTransform: [ prepareMessages, handleThinkingCapability, removeUnsupportedParameters, countQwenTokens ] } |
| ), |
| qwenProxy |
| ); |
|
|
| qwenRouter.post( |
| "/v1/embeddings", |
| ipLimiter, |
| createPreprocessorMiddleware( |
| { inApi: "openai", outApi: "openai", service: "qwen" }, |
| { afterTransform: [] } |
| ), |
| qwenProxy |
| ); |
|
|
| qwenRouter.get("/v1/models", handleModelRequest); |
|
|
| export const qwen = qwenRouter; |
|
|