|
|
import grpc from '@grpc/grpc-js';
|
|
|
import protoLoader from '@grpc/proto-loader';
|
|
|
import {AutoRouter, cors, error, json} from 'itty-router';
|
|
|
import dotenv from 'dotenv';
|
|
|
import path,{ dirname } from 'path';
|
|
|
import { fileURLToPath } from 'url';
|
|
|
import {createServerAdapter} from '@whatwg-node/server';
|
|
|
import {createServer} from 'http';
|
|
|
|
|
|
|
|
|
dotenv.config();
|
|
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
|
|
|
class Config {
|
|
|
constructor() {
|
|
|
this.API_PREFIX = process.env.API_PREFIX || '/';
|
|
|
this.API_KEY = process.env.API_KEY || '';
|
|
|
this.MAX_RETRY_COUNT = process.env.MAX_RETRY_COUNT || 3;
|
|
|
this.RETRY_DELAY = process.env.RETRY_DELAY || 5000;
|
|
|
this.COMMON_GRPC = 'runtime-native-io-vertex-inference-grpc-service-lmuw6mcn3q-ul.a.run.app';
|
|
|
this.COMMON_PROTO = path.join(__dirname,'..', 'protos', 'VertexInferenceService.proto')
|
|
|
this.GPT_GRPC = 'runtime-native-io-gpt-inference-grpc-service-lmuw6mcn3q-ul.a.run.app';
|
|
|
this.GPT_PROTO = path.join(__dirname,'..', 'protos', 'GPTInferenceService.proto')
|
|
|
this.PORT = process.env.PORT || 8787;
|
|
|
}
|
|
|
}
|
|
|
class GRPCHandler {
|
|
|
constructor(protoFilePath) {
|
|
|
|
|
|
this.packageDefinition = protoLoader.loadSync(protoFilePath, {
|
|
|
keepCase: true,
|
|
|
longs: String,
|
|
|
enums: String,
|
|
|
defaults: true,
|
|
|
oneofs: true
|
|
|
});
|
|
|
}
|
|
|
}
|
|
|
const config = new Config();
|
|
|
|
|
|
|
|
|
const { preflight, corsify } = cors({
|
|
|
origin: '*',
|
|
|
allowMethods: '*',
|
|
|
exposeHeaders: '*',
|
|
|
});
|
|
|
|
|
|
|
|
|
const withAuth = (request) => {
|
|
|
if (config.API_KEY) {
|
|
|
const authHeader = request.headers.get('Authorization');
|
|
|
if (!authHeader || !authHeader.startsWith('Bearer ')) {
|
|
|
return error(401, 'Unauthorized: Missing or invalid Authorization header');
|
|
|
}
|
|
|
const token = authHeader.substring(7);
|
|
|
if (token !== config.API_KEY) {
|
|
|
return error(403, 'Forbidden: Invalid API key');
|
|
|
}
|
|
|
}
|
|
|
};
|
|
|
|
|
|
const logger = (res, req) => {
|
|
|
console.log(req.method, res.status, req.url, Date.now() - req.start, 'ms');
|
|
|
};
|
|
|
const router = AutoRouter({
|
|
|
before: [preflight, withAuth],
|
|
|
missing: () => error(404, '404 not found.'),
|
|
|
finally: [corsify, logger],
|
|
|
});
|
|
|
|
|
|
router.get('/', () => json({ message: 'API 服务运行中~' }));
|
|
|
router.get('/ping', () => json({ message: 'pong' }));
|
|
|
router.get(config.API_PREFIX + '/v1/models', () =>
|
|
|
json({
|
|
|
object: 'list',
|
|
|
data: [
|
|
|
{ id: "gpt-4o-mini", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "gpt-4o", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "gpt-4-turbo", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "gpt-4", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "gpt-3.5-turbo", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "claude-3-sonnet@20240229", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "claude-3-opus@20240229", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "claude-3-haiku@20240307", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "claude-3-5-sonnet@20240620", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "gemini-1.5-flash", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "gemini-1.5-pro", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "chat-bison", object: "model", owned_by: "pieces-os" },
|
|
|
{ id: "codechat-bison", object: "model", owned_by: "pieces-os" },
|
|
|
],
|
|
|
})
|
|
|
);
|
|
|
router.post(config.API_PREFIX + '/v1/chat/completions', (req) => handleCompletion(req));
|
|
|
|
|
|
async function GrpcToPieces(models, message, rules, stream, temperature, top_p) {
|
|
|
|
|
|
|
|
|
const credentials = grpc.credentials.createSsl();
|
|
|
let client,request;
|
|
|
if (models.includes('gpt')){
|
|
|
|
|
|
const packageDefinition = new GRPCHandler(config.GPT_PROTO).packageDefinition;
|
|
|
|
|
|
request = {
|
|
|
models: models,
|
|
|
messages: [
|
|
|
{role: 0, message: rules},
|
|
|
{role: 1, message: message}
|
|
|
],
|
|
|
temperature:temperature || 0.1,
|
|
|
top_p:top_p ?? 1,
|
|
|
}
|
|
|
|
|
|
const GRPCobjects = grpc.loadPackageDefinition(packageDefinition).runtime.aot.machine_learning.parents.gpt;
|
|
|
client = new GRPCobjects.GPTInferenceService(config.GPT_GRPC, credentials);
|
|
|
} else {
|
|
|
|
|
|
const packageDefinition = new GRPCHandler(config.COMMON_PROTO).packageDefinition;
|
|
|
|
|
|
request = {
|
|
|
models: models,
|
|
|
args: {
|
|
|
messages: {
|
|
|
unknown: 1,
|
|
|
message: message
|
|
|
},
|
|
|
rules: rules
|
|
|
}
|
|
|
};
|
|
|
|
|
|
const GRPCobjects = grpc.loadPackageDefinition(packageDefinition).runtime.aot.machine_learning.parents.vertex;
|
|
|
client = new GRPCobjects.VertexInferenceService(config.COMMON_GRPC, credentials);
|
|
|
}
|
|
|
return await ConvertOpenai(client,request,models,stream);
|
|
|
}
|
|
|
|
|
|
async function messagesProcess(messages) {
|
|
|
let rules = '';
|
|
|
let message = '';
|
|
|
|
|
|
for (const msg of messages) {
|
|
|
let role = msg.role;
|
|
|
|
|
|
const contentStr = Array.isArray(msg.content)
|
|
|
? msg.content
|
|
|
.filter((item) => item.text)
|
|
|
.map((item) => item.text)
|
|
|
.join('') || ''
|
|
|
: msg.content;
|
|
|
|
|
|
if (role === 'system') {
|
|
|
rules += `system:${contentStr};\r\n`;
|
|
|
} else if (['user', 'assistant'].includes(role)) {
|
|
|
message += `${role}:${contentStr};\r\n`;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return { rules, message };
|
|
|
}
|
|
|
|
|
|
async function ConvertOpenai(client,request,model,stream) {
|
|
|
for (let i = 0; i < config.MAX_RETRY_COUNT; i++) {
|
|
|
try {
|
|
|
if (stream) {
|
|
|
const call = client.PredictWithStream(request);
|
|
|
const encoder = new TextEncoder();
|
|
|
const ReturnStream = new ReadableStream({
|
|
|
start(controller) {
|
|
|
call.on('data', (response) => {
|
|
|
let response_code = Number(response.response_code);
|
|
|
if (response_code === 204) {
|
|
|
|
|
|
controller.close()
|
|
|
call.destroy()
|
|
|
} else if (response_code === 200) {
|
|
|
let response_message
|
|
|
if (model.includes('gpt')) {
|
|
|
response_message = response.body.message_warpper.message.message;
|
|
|
} else {
|
|
|
response_message = response.args.args.args.message;
|
|
|
}
|
|
|
|
|
|
controller.enqueue(encoder.encode(`data: ${JSON.stringify(ChatCompletionStreamWithModel(response_message, model))}\n\n`));
|
|
|
} else {
|
|
|
controller.error(new Error(`Error: stream chunk is not success`));
|
|
|
controller.close()
|
|
|
}
|
|
|
})
|
|
|
}
|
|
|
});
|
|
|
return new Response(ReturnStream, {
|
|
|
headers: {
|
|
|
'Content-Type': 'text/event-stream',
|
|
|
},
|
|
|
})
|
|
|
} else {
|
|
|
const call = await new Promise((resolve, reject) => {
|
|
|
client.Predict(request, (err, response) => {
|
|
|
if (err) reject(err);
|
|
|
else resolve(response);
|
|
|
});
|
|
|
});
|
|
|
let response_code = Number(call.response_code);
|
|
|
if (response_code === 200) {
|
|
|
let response_message
|
|
|
if (model.includes('gpt')) {
|
|
|
response_message = call.body.message_warpper.message.message;
|
|
|
} else {
|
|
|
response_message = call.args.args.args.message;
|
|
|
}
|
|
|
return new Response(JSON.stringify(ChatCompletionWithModel(response_message, model)), {
|
|
|
headers: {
|
|
|
'Content-Type': 'application/json',
|
|
|
},
|
|
|
});
|
|
|
}
|
|
|
}
|
|
|
} catch (err) {
|
|
|
console.error(err);
|
|
|
await new Promise((resolve) => setTimeout(resolve, config.RETRY_DELAY));
|
|
|
}
|
|
|
}
|
|
|
return error(500, err.message);
|
|
|
}
|
|
|
|
|
|
function renameIfNeeded(input) {
|
|
|
|
|
|
const regex = /^(claude-3-(5-sonnet|haiku|sonnet|opus))-(\d{8})$/;
|
|
|
const match = input.match(regex);
|
|
|
if (match) {
|
|
|
return `${match[1]}@${match[3]}`;
|
|
|
}
|
|
|
return input;
|
|
|
}
|
|
|
|
|
|
function ChatCompletionWithModel(message, model) {
|
|
|
return {
|
|
|
id: 'Chat-Nekohy',
|
|
|
object: 'chat.completion',
|
|
|
created: Date.now(),
|
|
|
model,
|
|
|
usage: {
|
|
|
prompt_tokens: 0,
|
|
|
completion_tokens: 0,
|
|
|
total_tokens: 0,
|
|
|
},
|
|
|
choices: [
|
|
|
{
|
|
|
message: {
|
|
|
content: message,
|
|
|
role: 'assistant',
|
|
|
},
|
|
|
index: 0,
|
|
|
},
|
|
|
],
|
|
|
};
|
|
|
}
|
|
|
|
|
|
function ChatCompletionStreamWithModel(text, model) {
|
|
|
return {
|
|
|
id: 'chatcmpl-Nekohy',
|
|
|
object: 'chat.completion.chunk',
|
|
|
created: 0,
|
|
|
model,
|
|
|
choices: [
|
|
|
{
|
|
|
index: 0,
|
|
|
delta: {
|
|
|
content: text,
|
|
|
},
|
|
|
finish_reason: null,
|
|
|
},
|
|
|
],
|
|
|
};
|
|
|
}
|
|
|
|
|
|
async function handleCompletion(request) {
|
|
|
try {
|
|
|
|
|
|
|
|
|
const { model: inputModel, messages, stream,temperature,top_p} = await request.json();
|
|
|
console.log(inputModel,messages,stream)
|
|
|
const model = renameIfNeeded(inputModel);
|
|
|
|
|
|
const { rules, message:content } = await messagesProcess(messages);
|
|
|
console.log(rules,content)
|
|
|
|
|
|
return await GrpcToPieces(model, content, rules, stream, temperature, top_p);
|
|
|
} catch (err) {
|
|
|
return error(500, err.message);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
(async () => {
|
|
|
|
|
|
if (typeof addEventListener === 'function') return;
|
|
|
|
|
|
const ittyServer = createServerAdapter(router.fetch);
|
|
|
console.log(`Listening on http://localhost:${config.PORT}`);
|
|
|
const httpServer = createServer(ittyServer);
|
|
|
httpServer.listen(config.PORT);
|
|
|
})(); |