Spaces:
Running
Running
| import express from 'express'; | |
| import cors from 'cors'; | |
| import dotenv from 'dotenv'; | |
| import OpenAI from "openai"; | |
| import { BedrockRuntimeClient, ConverseCommand, ConverseStreamCommand } from "@aws-sdk/client-bedrock-runtime"; | |
| import { NodeHttpHandler } from "@smithy/node-http-handler"; | |
| dotenv.config(); | |
| const app = express(); | |
| const PORT = process.env.PORT || 7860; | |
| app.use(cors()); | |
| app.use(express.json({ limit: '50mb' })); | |
| // --- SYSTEM PROMPT DEFINITIONS --- | |
| const CLAUDE_SYSTEM_PROMPT = "You are a pro. Provide elite, high-level technical responses."; | |
| const GPT_SYSTEM_PROMPT = "You are a worker. Be concise, efficient, and get the job done."; | |
| const bedrockClient = new BedrockRuntimeClient({ | |
| region: "us-east-1", | |
| requestHandler: new NodeHttpHandler({ | |
| http2Handler: undefined, | |
| }) | |
| }); | |
| const azureOpenAI = new OpenAI({ | |
| apiKey: "", | |
| baseURL: ``, | |
| defaultQuery: { "api-version": "2024-05-01-preview" }, | |
| defaultHeaders: { "api-key": "" } | |
| }); | |
| // --- DYNAMIC MODEL ROUTER --- | |
| function getBedrockModelId(modelName) { | |
| switch(modelName) { | |
| case "haiku": | |
| return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5-20251001-v1:0"; | |
| case "maverick": | |
| return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/us.meta.llama4-maverick-17b-instruct-v1:0"; | |
| case "claude": | |
| default: | |
| return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-sonnet-4-6"; | |
| } | |
| } | |
| // --- NON-STREAMING ENDPOINT (UPDATED FOR VISION) --- | |
| app.post('/api/generate', async (req, res) => { | |
| // EXTRACT IMAGES HERE | |
| const { model, prompt, system_prompt, images } = req.body; | |
| console.log(`[TRAFFIC] Request for ${model} ${images?.length ? 'with images' : ''}`); | |
| try { | |
| if (model === "gpt" || model === "gpt-5-mini") { | |
| let messagesPayload =[ | |
| { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT } | |
| ]; | |
| // VISION SUPPORT FOR AZURE OPENAI | |
| if (images && images.length > 0) { | |
| let userContent = [{ type: "text", text: prompt }]; | |
| images.forEach(imgStr => { | |
| userContent.push({ type: "image_url", image_url: { url: imgStr } }); | |
| }); | |
| messagesPayload.push({ role: "user", content: userContent }); | |
| } else { | |
| messagesPayload.push({ role: "user", content: prompt }); | |
| } | |
| const response = await azureOpenAI.chat.completions.create({ | |
| model: "gpt-5-mini", | |
| messages: messagesPayload, | |
| reasoning_effort: "high" | |
| }); | |
| const totalTokens = response.usage ? response.usage.total_tokens : 0; | |
| res.json({ success: true, data: response.choices[0].message.content, usage: { totalTokenCount: totalTokens } }); | |
| } else { | |
| // Handles Claude Sonnet, Claude Haiku, and Llama Maverick | |
| const bedrockModelId = getBedrockModelId(model); | |
| // VISION SUPPORT FOR AWS BEDROCK | |
| let contentBlock = [{ text: prompt }]; | |
| if (images && images.length > 0) { | |
| const imageBlocks = images.map(imgStr => { | |
| const base64Data = imgStr.replace(/^data:image\/\w+;base64,/, ""); | |
| return { | |
| image: { | |
| format: 'png', // Assuming normalized to PNG by frontend | |
| source: { bytes: Buffer.from(base64Data, 'base64') } | |
| } | |
| }; | |
| }); | |
| contentBlock = [...imageBlocks, ...contentBlock]; | |
| } | |
| const command = new ConverseCommand({ | |
| modelId: bedrockModelId, | |
| system:[{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }], | |
| messages: [{ role: "user", content: contentBlock }], | |
| // Ensure maxTokens is large enough for reasoning + response | |
| inferenceConfig: { | |
| maxTokens: model.includes("haiku") ? 32000 : 4000, | |
| temperature: 1 | |
| }, | |
| performanceConfig: model.includes("maverick") ? { latency: "standard" } : undefined, | |
| additionalModelRequestFields: (function() { | |
| if (model.includes("haiku")) { | |
| return { | |
| reasoning_config: { | |
| type: "enabled", | |
| budget_tokens: 2048 | |
| } | |
| }; | |
| } else if (model.includes("claude")) { | |
| return { | |
| // thinking: { type: "adaptive" }, | |
| output_config: { effort: "high" } | |
| }; | |
| } | |
| return undefined; | |
| })() | |
| }); | |
| const response = await bedrockClient.send(command); | |
| const text = response.output.message.content.find(b => b.text)?.text; | |
| const tokenUsage = response.usage ? (response.usage.inputTokens + response.usage.outputTokens) : 0; | |
| res.json({ success: true, data: text, usage: { totalTokenCount: tokenUsage } }); | |
| } | |
| } catch (err) { | |
| console.error(`❌[${model?.toUpperCase() || 'UNKNOWN'} ERROR]:`, err.name, err.message); | |
| res.status(500).json({ success: false, error: `${err.name}: ${err.message}` }); | |
| } | |
| }); | |
| // --- STREAMING ENDPOINT --- | |
| app.post('/api/stream', async (req, res) => { | |
| const { model, prompt, system_prompt, images } = req.body; | |
| console.log(`[STREAM] Request for ${model} ${images?.length ? 'with images' : ''}`); | |
| res.setHeader('Content-Type', 'text/plain; charset=utf-8'); | |
| res.setHeader('Transfer-Encoding', 'chunked'); | |
| res.setHeader('X-Accel-Buffering', 'no'); | |
| res.flushHeaders(); | |
| let totalTokenCount = 0; | |
| try { | |
| if (model === "gpt" || model === "gpt-5-mini") { | |
| let messagesPayload =[ | |
| { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT } | |
| ]; | |
| let userContent =[]; | |
| if (images && images.length > 0) { | |
| userContent.push({ type: "text", text: prompt }); | |
| images.forEach(imgStr => { | |
| userContent.push({ type: "image_url", image_url: { url: imgStr } }); | |
| }); | |
| messagesPayload.push({ role: "user", content: userContent }); | |
| } else { | |
| messagesPayload.push({ role: "user", content: prompt }); | |
| } | |
| const stream = await azureOpenAI.chat.completions.create({ | |
| model: "gpt-5-mini", | |
| messages: messagesPayload, | |
| reasoning_effort: "high", | |
| stream: true, | |
| stream_options: { include_usage: true } | |
| }); | |
| for await (const chunk of stream) { | |
| const delta = chunk.choices[0]?.delta; | |
| if (delta?.reasoning_content) res.write(`__THINK__${delta.reasoning_content}`); | |
| else if (delta?.content) res.write(delta.content); | |
| if (chunk.usage) totalTokenCount = chunk.usage.total_tokens; | |
| } | |
| res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`); | |
| res.end(); | |
| } else { | |
| const bedrockModelId = getBedrockModelId(model); | |
| let contentBlock = [{ text: prompt }]; | |
| if (images && images.length > 0) { | |
| const imageBlocks = images.map(imgStr => { | |
| const base64Data = imgStr.replace(/^data:image\/\w+;base64,/, ""); | |
| return { | |
| image: { | |
| format: 'png', // Assuming normalized to PNG by frontend | |
| source: { bytes: Buffer.from(base64Data, 'base64') } | |
| } | |
| }; | |
| }); | |
| contentBlock = [...imageBlocks, ...contentBlock]; | |
| } | |
| const command = new ConverseStreamCommand({ | |
| modelId: bedrockModelId, | |
| system:[{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }], | |
| messages:[{ role: "user", content: contentBlock }], | |
| inferenceConfig: { maxTokens: 48000, temperature: 1 }, | |
| additionalModelRequestFields: model.includes("claude") ? { | |
| thinking: { type: "adaptive" }, | |
| output_config: { effort: "high" } | |
| } : undefined | |
| }); | |
| const response = await bedrockClient.send(command); | |
| for await (const chunk of response.stream) { | |
| if (chunk.contentBlockDelta) { | |
| const delta = chunk.contentBlockDelta.delta; | |
| if (delta.reasoningContent && delta.reasoningContent.text) { | |
| res.write(`__THINK__${delta.reasoningContent.text}`); | |
| } else if (delta.text) { | |
| res.write(delta.text); | |
| } | |
| } | |
| if (chunk.metadata && chunk.metadata.usage) { | |
| totalTokenCount = (chunk.metadata.usage.inputTokens || 0) + (chunk.metadata.usage.outputTokens || 0); | |
| } | |
| } | |
| res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`); | |
| res.end(); | |
| } | |
| } catch (err) { | |
| console.error(`❌ [STREAM ERROR]:`, err.message); | |
| res.write(`ERROR: ${err.message}`); | |
| res.end(); | |
| } | |
| }); | |
| app.get('/', async (req, res) => { res.json({ success: true }); }); | |
| app.listen(PORT, '0.0.0.0', () => console.log(`Main AI Agent live on port ${PORT}`)); | |
| /* import express from 'express'; | |
| import cors from 'cors'; | |
| import dotenv from 'dotenv'; | |
| import OpenAI from "openai"; | |
| import { BedrockRuntimeClient, ConverseCommand, ConverseStreamCommand } from "@aws-sdk/client-bedrock-runtime"; | |
| import { NodeHttpHandler } from "@smithy/node-http-handler"; | |
| dotenv.config(); | |
| const app = express(); | |
| const PORT = process.env.PORT || 7860; | |
| app.use(cors()); | |
| app.use(express.json({ limit: '50mb' })); | |
| // --- SYSTEM PROMPT DEFINITIONS --- | |
| const CLAUDE_SYSTEM_PROMPT = "You are a pro. Provide elite, high-level technical responses."; | |
| const GPT_SYSTEM_PROMPT = "You are a worker. Be concise, efficient, and get the job done."; | |
| const bedrockClient = new BedrockRuntimeClient({ | |
| region: "us-east-1", | |
| requestHandler: new NodeHttpHandler({ | |
| http2Handler: undefined, | |
| }) | |
| }); | |
| const azureOpenAI = new OpenAI({ | |
| apiKey: "7U3m9NRkE38ThSWTr92hMgQ4hDCUFI9MAnFNrCgRL7MhdvckfTXwJQQJ99CBACHYHv6XJ3w3AAAAACOGV22P", | |
| baseURL: `https://hollowpad-resource.cognitiveservices.azure.com/openai/deployments/gpt-5-mini`, | |
| defaultQuery: { "api-version": "2024-05-01-preview" }, | |
| defaultHeaders: { "api-key": "7U3m9NRkE38ThSWTr92hMgQ4hDCUFI9MAnFNrCgRL7MhdvckfTXwJQQJ99CBACHYHv6XJ3w3AAAAACOGV22P" } | |
| }); | |
| // --- DYNAMIC MODEL ROUTER --- | |
| function getBedrockModelId(modelName) { | |
| switch(modelName) { | |
| case "haiku": | |
| return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5-20251001-v1:0" | |
| // return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5"; | |
| case "maverick": | |
| // Standard Bedrock cross-region inference mapping for Llama | |
| // return "arn:aws:bedrock:us-east-1::foundation-model/meta.llama4-maverick-17b-instruct-v1:0"; | |
| return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/us.meta.llama4-maverick-17b-instruct-v1:0"; | |
| case "claude": | |
| default: | |
| return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-sonnet-4-6"; | |
| } | |
| } | |
| // --- NON-STREAMING ENDPOINT --- | |
| app.post('/api/generate', async (req, res) => { | |
| const { model, prompt, system_prompt } = req.body; | |
| console.log(`[TRAFFIC] Request for ${model}`); | |
| try { | |
| if (model === "gpt" || model === "gpt-5-mini") { | |
| const response = await azureOpenAI.chat.completions.create({ | |
| model: "gpt-5-mini", | |
| messages:[ | |
| { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT }, | |
| { role: "user", content: prompt } | |
| ], | |
| reasoning_effort: "high" | |
| }); | |
| const totalTokens = response.usage ? response.usage.total_tokens : 0; | |
| res.json({ success: true, data: response.choices[0].message.content, usage: { totalTokenCount: totalTokens } }); | |
| } else { | |
| // Handles Claude Sonnet, Claude Haiku, and Llama Maverick | |
| const bedrockModelId = getBedrockModelId(model); | |
| const command = new ConverseCommand({ | |
| modelId: bedrockModelId, | |
| system: [{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }], | |
| messages: [{ role: "user", content: [{ text: prompt }] }], | |
| // Ensure maxTokens is large enough for reasoning + response | |
| inferenceConfig: { | |
| maxTokens: model.includes("haiku") ? 32000 : 4000, | |
| temperature: 1 | |
| }, | |
| performanceConfig: model.includes("maverick") ? { latency: "standard" } : undefined, | |
| additionalModelRequestFields: (function() { | |
| if (model.includes("haiku")) { | |
| return { | |
| reasoning_config: { | |
| type: "enabled", | |
| budget_tokens: 2048 // As seen in your screenshot | |
| } | |
| }; | |
| } else if (model.includes("claude")) { | |
| return { | |
| thinking: { type: "adaptive" }, | |
| output_config: { effort: "high" } | |
| }; | |
| } | |
| return undefined; | |
| })() | |
| }); | |
| const response = await bedrockClient.send(command); | |
| const text = response.output.message.content.find(b => b.text)?.text; | |
| const tokenUsage = response.usage ? (response.usage.inputTokens + response.usage.outputTokens) : 0; | |
| res.json({ success: true, data: text, usage: { totalTokenCount: tokenUsage } }); | |
| } | |
| } catch (err) { | |
| console.error(`❌[${model?.toUpperCase() || 'UNKNOWN'} ERROR]:`, err.name, err.message); | |
| res.status(500).json({ success: false, error: `${err.name}: ${err.message}` }); | |
| } | |
| }); | |
| // --- STREAMING ENDPOINT --- | |
| app.post('/api/stream', async (req, res) => { | |
| const { model, prompt, system_prompt, images } = req.body; | |
| console.log(`[STREAM] Request for ${model} ${images?.length ? 'with images' : ''}`); | |
| res.setHeader('Content-Type', 'text/plain; charset=utf-8'); | |
| res.setHeader('Transfer-Encoding', 'chunked'); | |
| res.setHeader('X-Accel-Buffering', 'no'); | |
| res.flushHeaders(); | |
| let totalTokenCount = 0; | |
| try { | |
| if (model === "gpt" || model === "gpt-5-mini") { | |
| let messagesPayload =[ | |
| { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT } | |
| ]; | |
| let userContent =[]; | |
| if (images && images.length > 0) { | |
| userContent.push({ type: "text", text: prompt }); | |
| images.forEach(imgStr => { | |
| userContent.push({ type: "image_url", image_url: { url: imgStr } }); | |
| }); | |
| messagesPayload.push({ role: "user", content: userContent }); | |
| } else { | |
| messagesPayload.push({ role: "user", content: prompt }); | |
| } | |
| const stream = await azureOpenAI.chat.completions.create({ | |
| model: "gpt-5-mini", | |
| messages: messagesPayload, | |
| reasoning_effort: "high", | |
| stream: true, | |
| stream_options: { include_usage: true } | |
| }); | |
| for await (const chunk of stream) { | |
| const delta = chunk.choices[0]?.delta; | |
| if (delta?.reasoning_content) res.write(`__THINK__${delta.reasoning_content}`); | |
| else if (delta?.content) res.write(delta.content); | |
| if (chunk.usage) totalTokenCount = chunk.usage.total_tokens; | |
| } | |
| res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`); | |
| res.end(); | |
| } else { | |
| const bedrockModelId = getBedrockModelId(model); | |
| let contentBlock = [{ text: prompt }]; | |
| if (images && images.length > 0) { | |
| const imageBlocks = images.map(imgStr => { | |
| const base64Data = imgStr.replace(/^data:image\/\w+;base64,/, ""); | |
| return { | |
| image: { | |
| format: 'png', // Assuming normalized to PNG by frontend | |
| source: { bytes: Buffer.from(base64Data, 'base64') } | |
| } | |
| }; | |
| }); | |
| contentBlock = [...imageBlocks, ...contentBlock]; | |
| } | |
| const command = new ConverseStreamCommand({ | |
| modelId: bedrockModelId, | |
| system:[{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }], | |
| messages: [{ role: "user", content: contentBlock }], | |
| inferenceConfig: { maxTokens: 48000, temperature: 1 }, | |
| additionalModelRequestFields: model.includes("claude") ? { | |
| thinking: { type: "adaptive" }, | |
| output_config: { effort: "high" } | |
| } : undefined | |
| }); | |
| const response = await bedrockClient.send(command); | |
| for await (const chunk of response.stream) { | |
| if (chunk.contentBlockDelta) { | |
| const delta = chunk.contentBlockDelta.delta; | |
| if (delta.reasoningContent && delta.reasoningContent.text) { | |
| res.write(`__THINK__${delta.reasoningContent.text}`); | |
| } else if (delta.text) { | |
| res.write(delta.text); | |
| } | |
| } | |
| if (chunk.metadata && chunk.metadata.usage) { | |
| totalTokenCount = (chunk.metadata.usage.inputTokens || 0) + (chunk.metadata.usage.outputTokens || 0); | |
| } | |
| } | |
| res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`); | |
| res.end(); | |
| } | |
| } catch (err) { | |
| console.error(`❌ [STREAM ERROR]:`, err.message); | |
| res.write(`ERROR: ${err.message}`); | |
| res.end(); | |
| } | |
| }); | |
| app.get('/', async (req, res) => { res.json({ success: true }); }); | |
| app.listen(PORT, '0.0.0.0', () => console.log(`Main AI Agent live on port ${PORT}`)); | |
| */ |