Pepguy commited on
Commit
e5846be
·
verified ·
1 Parent(s): e07a0e5

Update app.js

Browse files
Files changed (1) hide show
  1. app.js +173 -149
app.js CHANGED
@@ -34,31 +34,41 @@ const azureOpenAI = new OpenAI({
34
  function getBedrockModelId(modelName) {
35
  switch(modelName) {
36
  case "haiku":
37
- return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5-20251001-v1:0"
38
- // return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5";
39
  case "maverick":
40
- // Standard Bedrock cross-region inference mapping for Llama
41
- // return "arn:aws:bedrock:us-east-1::foundation-model/meta.llama4-maverick-17b-instruct-v1:0";
42
- return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/us.meta.llama4-maverick-17b-instruct-v1:0";
43
- case "claude":
44
  default:
45
  return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-sonnet-4-6";
46
  }
47
  }
48
 
49
- // --- NON-STREAMING ENDPOINT ---
50
  app.post('/api/generate', async (req, res) => {
51
- const { model, prompt, system_prompt } = req.body;
52
- console.log(`[TRAFFIC] Request for ${model}`);
 
53
 
54
  try {
55
  if (model === "gpt" || model === "gpt-5-mini") {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  const response = await azureOpenAI.chat.completions.create({
57
  model: "gpt-5-mini",
58
- messages:[
59
- { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT },
60
- { role: "user", content: prompt }
61
- ],
62
  reasoning_effort: "high"
63
  });
64
 
@@ -68,56 +78,52 @@ app.post('/api/generate', async (req, res) => {
68
  } else {
69
  // Handles Claude Sonnet, Claude Haiku, and Llama Maverick
70
  const bedrockModelId = getBedrockModelId(model);
71
-
72
- /* const command = new ConverseCommand({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  modelId: bedrockModelId,
74
- system: [{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
75
- messages: [{ role: "user", content: [{ text: prompt }] }],
76
-
77
- inferenceConfig: model.includes("claude") ? { maxTokens: 48000, temperature: 1 }:
78
- (model.includes("haiku")? {maxTokens: 30000, temperature: 1 }
79
- : {maxTokens: 3800, temperature: 1 }),
80
-
81
- performanceConfig: model.includes("maverick") ? {latency: "standard"} : undefined,
82
 
83
- additionalModelRequestFields: model.includes("claude") ? {
84
- thinking: { type: "adaptive" },
85
- output_config: { effort: "high" }
86
- } : undefined // Llama does not support Claude's specific thinking fields
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  });
88
-
89
- */
90
-
91
- const command = new ConverseCommand({
92
- modelId: bedrockModelId,
93
- system: [{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
94
- messages: [{ role: "user", content: [{ text: prompt }] }],
95
-
96
- // Ensure maxTokens is large enough for reasoning + response
97
- inferenceConfig: {
98
- maxTokens: model.includes("haiku") ? 32000 : 4000,
99
- temperature: 1
100
- },
101
-
102
- performanceConfig: model.includes("maverick") ? { latency: "standard" } : undefined,
103
-
104
- additionalModelRequestFields: (function() {
105
- if (model.includes("haiku")) {
106
- return {
107
- reasoning_config: {
108
- type: "enabled",
109
- budget_tokens: 2048 // As seen in your screenshot
110
- }
111
- };
112
- } else if (model.includes("claude")) {
113
- return {
114
- thinking: { type: "adaptive" },
115
- output_config: { effort: "high" }
116
- };
117
- }
118
- return undefined;
119
- })()
120
- });
121
 
122
  const response = await bedrockClient.send(command);
123
  const text = response.output.message.content.find(b => b.text)?.text;
@@ -198,7 +204,7 @@ app.post('/api/stream', async (req, res) => {
198
  const command = new ConverseStreamCommand({
199
  modelId: bedrockModelId,
200
  system:[{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
201
- messages: [{ role: "user", content: contentBlock }],
202
  inferenceConfig: { maxTokens: 48000, temperature: 1 },
203
  additionalModelRequestFields: model.includes("claude") ? {
204
  thinking: { type: "adaptive" },
@@ -255,8 +261,8 @@ const CLAUDE_SYSTEM_PROMPT = "You are a pro. Provide elite, high-level technical
255
  const GPT_SYSTEM_PROMPT = "You are a worker. Be concise, efficient, and get the job done.";
256
 
257
  const bedrockClient = new BedrockRuntimeClient({
258
- region: "us-east-1" ,
259
- requestHandler: new NodeHttpHandler({
260
  http2Handler: undefined,
261
  })
262
  });
@@ -268,38 +274,32 @@ const azureOpenAI = new OpenAI({
268
  defaultHeaders: { "api-key": "7U3m9NRkE38ThSWTr92hMgQ4hDCUFI9MAnFNrCgRL7MhdvckfTXwJQQJ99CBACHYHv6XJ3w3AAAAACOGV22P" }
269
  });
270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  // --- NON-STREAMING ENDPOINT ---
272
  app.post('/api/generate', async (req, res) => {
273
- const { model, prompt, system_prompt} = req.body;
274
  console.log(`[TRAFFIC] Request for ${model}`);
275
 
276
  try {
277
- if (model === "claude") {
278
- const command = new ConverseCommand({
279
- modelId: "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-sonnet-4-6",
280
- system: [{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
281
- messages: [{ role: "user", content: [{ text: prompt }] }],
282
- inferenceConfig: { maxTokens: 48000, temperature: 1 },
283
- additionalModelRequestFields: {
284
- thinking: { type: "adaptive" },
285
- output_config: { effort: "high" }
286
- }
287
- });
288
-
289
- const response = await bedrockClient.send(command);
290
- const text = response.output.message.content.find(b => b.text)?.text;
291
- const usage = response.metrics ?
292
- (response.metrics.latencyMs || 0) : 0; // Bedrock metrics vary, usually usage is in metadata
293
-
294
- // Extract token usage from Bedrock response
295
- const tokenUsage = response.usage ? (response.usage.inputTokens + response.usage.outputTokens) : 0;
296
-
297
- res.json({ success: true, data: text, usage: { totalTokenCount: tokenUsage } });
298
-
299
- } else {
300
  const response = await azureOpenAI.chat.completions.create({
301
  model: "gpt-5-mini",
302
- messages: [
303
  { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT },
304
  { role: "user", content: prompt }
305
  ],
@@ -308,9 +308,51 @@ app.post('/api/generate', async (req, res) => {
308
 
309
  const totalTokens = response.usage ? response.usage.total_tokens : 0;
310
  res.json({ success: true, data: response.choices[0].message.content, usage: { totalTokenCount: totalTokens } });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  }
312
  } catch (err) {
313
- console.error(`❌ [${model.toUpperCase()} ERROR]:`, err.name, err.message);
314
  res.status(500).json({ success: false, error: `${err.name}: ${err.message}` });
315
  }
316
  });
@@ -328,8 +370,42 @@ app.post('/api/stream', async (req, res) => {
328
  let totalTokenCount = 0;
329
 
330
  try {
331
- if (model === "claude") {
332
- // Construct Content Block for Claude
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  let contentBlock = [{ text: prompt }];
334
 
335
  if (images && images.length > 0) {
@@ -337,7 +413,7 @@ app.post('/api/stream', async (req, res) => {
337
  const base64Data = imgStr.replace(/^data:image\/\w+;base64,/, "");
338
  return {
339
  image: {
340
- format: 'png',
341
  source: { bytes: Buffer.from(base64Data, 'base64') }
342
  }
343
  };
@@ -346,14 +422,14 @@ app.post('/api/stream', async (req, res) => {
346
  }
347
 
348
  const command = new ConverseStreamCommand({
349
- modelId: "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-sonnet-4-6",
350
- system: [{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
351
  messages: [{ role: "user", content: contentBlock }],
352
  inferenceConfig: { maxTokens: 48000, temperature: 1 },
353
- additionalModelRequestFields: {
354
  thinking: { type: "adaptive" },
355
  output_config: { effort: "high" }
356
- }
357
  });
358
 
359
  const response = await bedrockClient.send(command);
@@ -367,60 +443,11 @@ app.post('/api/stream', async (req, res) => {
367
  res.write(delta.text);
368
  }
369
  }
370
- // Capture Usage from Bedrock Stream
371
  if (chunk.metadata && chunk.metadata.usage) {
372
  totalTokenCount = (chunk.metadata.usage.inputTokens || 0) + (chunk.metadata.usage.outputTokens || 0);
373
  }
374
  }
375
 
376
- // Send Usage Footer
377
- res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`);
378
- res.end();
379
-
380
- } else {
381
- // Construct Content Block for OpenAI
382
- let messagesPayload = [
383
- { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT }
384
- ];
385
-
386
- let userContent = [];
387
- if (images && images.length > 0) {
388
- userContent.push({ type: "text", text: prompt });
389
- images.forEach(imgStr => {
390
- userContent.push({
391
- type: "image_url",
392
- image_url: { url: imgStr }
393
- });
394
- });
395
- messagesPayload.push({ role: "user", content: userContent });
396
- } else {
397
- messagesPayload.push({ role: "user", content: prompt });
398
- }
399
-
400
- const stream = await azureOpenAI.chat.completions.create({
401
- model: "gpt-5-mini",
402
- messages: messagesPayload,
403
- reasoning_effort: "high",
404
- stream: true,
405
- stream_options: { include_usage: true } // Request usage stats in stream
406
- });
407
-
408
- for await (const chunk of stream) {
409
- const delta = chunk.choices[0]?.delta;
410
-
411
- if (delta?.reasoning_content) {
412
- res.write(`__THINK__${delta.reasoning_content}`);
413
- } else if (delta?.content) {
414
- res.write(delta.content);
415
- }
416
-
417
- // Capture Usage from OpenAI Stream (usually in the last chunk)
418
- if (chunk.usage) {
419
- totalTokenCount = chunk.usage.total_tokens;
420
- }
421
- }
422
-
423
- // Send Usage Footer
424
  res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`);
425
  res.end();
426
  }
@@ -431,9 +458,6 @@ app.post('/api/stream', async (req, res) => {
431
  }
432
  });
433
 
434
- app.get('/', async (req, res) => {
435
- res.json({ success: true });
436
- });
437
-
438
  app.listen(PORT, '0.0.0.0', () => console.log(`Main AI Agent live on port ${PORT}`));
439
  */
 
34
  function getBedrockModelId(modelName) {
35
  switch(modelName) {
36
  case "haiku":
37
+ return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5-20251001-v1:0";
 
38
  case "maverick":
39
+ return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/us.meta.llama4-maverick-17b-instruct-v1:0";
40
+ case "claude":
 
 
41
  default:
42
  return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-sonnet-4-6";
43
  }
44
  }
45
 
46
+ // --- NON-STREAMING ENDPOINT (UPDATED FOR VISION) ---
47
  app.post('/api/generate', async (req, res) => {
48
+ // EXTRACT IMAGES HERE
49
+ const { model, prompt, system_prompt, images } = req.body;
50
+ console.log(`[TRAFFIC] Request for ${model} ${images?.length ? 'with images' : ''}`);
51
 
52
  try {
53
  if (model === "gpt" || model === "gpt-5-mini") {
54
+ let messagesPayload =[
55
+ { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT }
56
+ ];
57
+
58
+ // VISION SUPPORT FOR AZURE OPENAI
59
+ if (images && images.length > 0) {
60
+ let userContent = [{ type: "text", text: prompt }];
61
+ images.forEach(imgStr => {
62
+ userContent.push({ type: "image_url", image_url: { url: imgStr } });
63
+ });
64
+ messagesPayload.push({ role: "user", content: userContent });
65
+ } else {
66
+ messagesPayload.push({ role: "user", content: prompt });
67
+ }
68
+
69
  const response = await azureOpenAI.chat.completions.create({
70
  model: "gpt-5-mini",
71
+ messages: messagesPayload,
 
 
 
72
  reasoning_effort: "high"
73
  });
74
 
 
78
  } else {
79
  // Handles Claude Sonnet, Claude Haiku, and Llama Maverick
80
  const bedrockModelId = getBedrockModelId(model);
81
+
82
+ // VISION SUPPORT FOR AWS BEDROCK
83
+ let contentBlock = [{ text: prompt }];
84
+ if (images && images.length > 0) {
85
+ const imageBlocks = images.map(imgStr => {
86
+ const base64Data = imgStr.replace(/^data:image\/\w+;base64,/, "");
87
+ return {
88
+ image: {
89
+ format: 'png', // Assuming normalized to PNG by frontend
90
+ source: { bytes: Buffer.from(base64Data, 'base64') }
91
+ }
92
+ };
93
+ });
94
+ contentBlock = [...imageBlocks, ...contentBlock];
95
+ }
96
+
97
+ const command = new ConverseCommand({
98
  modelId: bedrockModelId,
99
+ system:[{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
100
+ messages: [{ role: "user", content: contentBlock }],
 
 
 
 
 
 
101
 
102
+ // Ensure maxTokens is large enough for reasoning + response
103
+ inferenceConfig: {
104
+ maxTokens: model.includes("haiku") ? 32000 : 4000,
105
+ temperature: 1
106
+ },
107
+
108
+ performanceConfig: model.includes("maverick") ? { latency: "standard" } : undefined,
109
+
110
+ additionalModelRequestFields: (function() {
111
+ if (model.includes("haiku")) {
112
+ return {
113
+ reasoning_config: {
114
+ type: "enabled",
115
+ budget_tokens: 2048
116
+ }
117
+ };
118
+ } else if (model.includes("claude")) {
119
+ return {
120
+ thinking: { type: "adaptive" },
121
+ output_config: { effort: "high" }
122
+ };
123
+ }
124
+ return undefined;
125
+ })()
126
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  const response = await bedrockClient.send(command);
129
  const text = response.output.message.content.find(b => b.text)?.text;
 
204
  const command = new ConverseStreamCommand({
205
  modelId: bedrockModelId,
206
  system:[{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
207
+ messages:[{ role: "user", content: contentBlock }],
208
  inferenceConfig: { maxTokens: 48000, temperature: 1 },
209
  additionalModelRequestFields: model.includes("claude") ? {
210
  thinking: { type: "adaptive" },
 
261
  const GPT_SYSTEM_PROMPT = "You are a worker. Be concise, efficient, and get the job done.";
262
 
263
  const bedrockClient = new BedrockRuntimeClient({
264
+ region: "us-east-1",
265
+ requestHandler: new NodeHttpHandler({
266
  http2Handler: undefined,
267
  })
268
  });
 
274
  defaultHeaders: { "api-key": "7U3m9NRkE38ThSWTr92hMgQ4hDCUFI9MAnFNrCgRL7MhdvckfTXwJQQJ99CBACHYHv6XJ3w3AAAAACOGV22P" }
275
  });
276
 
277
+ // --- DYNAMIC MODEL ROUTER ---
278
+ function getBedrockModelId(modelName) {
279
+ switch(modelName) {
280
+ case "haiku":
281
+ return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5-20251001-v1:0"
282
+ // return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-haiku-4-5";
283
+ case "maverick":
284
+ // Standard Bedrock cross-region inference mapping for Llama
285
+ // return "arn:aws:bedrock:us-east-1::foundation-model/meta.llama4-maverick-17b-instruct-v1:0";
286
+ return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/us.meta.llama4-maverick-17b-instruct-v1:0";
287
+ case "claude":
288
+ default:
289
+ return "arn:aws:bedrock:us-east-1:106774395747:inference-profile/global.anthropic.claude-sonnet-4-6";
290
+ }
291
+ }
292
+
293
  // --- NON-STREAMING ENDPOINT ---
294
  app.post('/api/generate', async (req, res) => {
295
+ const { model, prompt, system_prompt } = req.body;
296
  console.log(`[TRAFFIC] Request for ${model}`);
297
 
298
  try {
299
+ if (model === "gpt" || model === "gpt-5-mini") {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  const response = await azureOpenAI.chat.completions.create({
301
  model: "gpt-5-mini",
302
+ messages:[
303
  { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT },
304
  { role: "user", content: prompt }
305
  ],
 
308
 
309
  const totalTokens = response.usage ? response.usage.total_tokens : 0;
310
  res.json({ success: true, data: response.choices[0].message.content, usage: { totalTokenCount: totalTokens } });
311
+
312
+ } else {
313
+ // Handles Claude Sonnet, Claude Haiku, and Llama Maverick
314
+ const bedrockModelId = getBedrockModelId(model);
315
+
316
+
317
+ const command = new ConverseCommand({
318
+ modelId: bedrockModelId,
319
+ system: [{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
320
+ messages: [{ role: "user", content: [{ text: prompt }] }],
321
+
322
+ // Ensure maxTokens is large enough for reasoning + response
323
+ inferenceConfig: {
324
+ maxTokens: model.includes("haiku") ? 32000 : 4000,
325
+ temperature: 1
326
+ },
327
+
328
+ performanceConfig: model.includes("maverick") ? { latency: "standard" } : undefined,
329
+
330
+ additionalModelRequestFields: (function() {
331
+ if (model.includes("haiku")) {
332
+ return {
333
+ reasoning_config: {
334
+ type: "enabled",
335
+ budget_tokens: 2048 // As seen in your screenshot
336
+ }
337
+ };
338
+ } else if (model.includes("claude")) {
339
+ return {
340
+ thinking: { type: "adaptive" },
341
+ output_config: { effort: "high" }
342
+ };
343
+ }
344
+ return undefined;
345
+ })()
346
+ });
347
+
348
+ const response = await bedrockClient.send(command);
349
+ const text = response.output.message.content.find(b => b.text)?.text;
350
+ const tokenUsage = response.usage ? (response.usage.inputTokens + response.usage.outputTokens) : 0;
351
+
352
+ res.json({ success: true, data: text, usage: { totalTokenCount: tokenUsage } });
353
  }
354
  } catch (err) {
355
+ console.error(`❌[${model?.toUpperCase() || 'UNKNOWN'} ERROR]:`, err.name, err.message);
356
  res.status(500).json({ success: false, error: `${err.name}: ${err.message}` });
357
  }
358
  });
 
370
  let totalTokenCount = 0;
371
 
372
  try {
373
+ if (model === "gpt" || model === "gpt-5-mini") {
374
+ let messagesPayload =[
375
+ { role: "system", content: system_prompt || GPT_SYSTEM_PROMPT }
376
+ ];
377
+
378
+ let userContent =[];
379
+ if (images && images.length > 0) {
380
+ userContent.push({ type: "text", text: prompt });
381
+ images.forEach(imgStr => {
382
+ userContent.push({ type: "image_url", image_url: { url: imgStr } });
383
+ });
384
+ messagesPayload.push({ role: "user", content: userContent });
385
+ } else {
386
+ messagesPayload.push({ role: "user", content: prompt });
387
+ }
388
+
389
+ const stream = await azureOpenAI.chat.completions.create({
390
+ model: "gpt-5-mini",
391
+ messages: messagesPayload,
392
+ reasoning_effort: "high",
393
+ stream: true,
394
+ stream_options: { include_usage: true }
395
+ });
396
+
397
+ for await (const chunk of stream) {
398
+ const delta = chunk.choices[0]?.delta;
399
+ if (delta?.reasoning_content) res.write(`__THINK__${delta.reasoning_content}`);
400
+ else if (delta?.content) res.write(delta.content);
401
+ if (chunk.usage) totalTokenCount = chunk.usage.total_tokens;
402
+ }
403
+
404
+ res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`);
405
+ res.end();
406
+
407
+ } else {
408
+ const bedrockModelId = getBedrockModelId(model);
409
  let contentBlock = [{ text: prompt }];
410
 
411
  if (images && images.length > 0) {
 
413
  const base64Data = imgStr.replace(/^data:image\/\w+;base64,/, "");
414
  return {
415
  image: {
416
+ format: 'png', // Assuming normalized to PNG by frontend
417
  source: { bytes: Buffer.from(base64Data, 'base64') }
418
  }
419
  };
 
422
  }
423
 
424
  const command = new ConverseStreamCommand({
425
+ modelId: bedrockModelId,
426
+ system:[{ text: system_prompt || CLAUDE_SYSTEM_PROMPT }],
427
  messages: [{ role: "user", content: contentBlock }],
428
  inferenceConfig: { maxTokens: 48000, temperature: 1 },
429
+ additionalModelRequestFields: model.includes("claude") ? {
430
  thinking: { type: "adaptive" },
431
  output_config: { effort: "high" }
432
+ } : undefined
433
  });
434
 
435
  const response = await bedrockClient.send(command);
 
443
  res.write(delta.text);
444
  }
445
  }
 
446
  if (chunk.metadata && chunk.metadata.usage) {
447
  totalTokenCount = (chunk.metadata.usage.inputTokens || 0) + (chunk.metadata.usage.outputTokens || 0);
448
  }
449
  }
450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  res.write(`__USAGE__${JSON.stringify({ totalTokenCount })}`);
452
  res.end();
453
  }
 
458
  }
459
  });
460
 
461
+ app.get('/', async (req, res) => { res.json({ success: true }); });
 
 
 
462
  app.listen(PORT, '0.0.0.0', () => console.log(`Main AI Agent live on port ${PORT}`));
463
  */