| import grpc from '@grpc/grpc-js'; |
| import protoLoader from '@grpc/proto-loader'; |
| import {AutoRouter, cors, error, json} from 'itty-router'; |
| import dotenv from 'dotenv'; |
| import path,{ dirname } from 'path'; |
| import { fileURLToPath } from 'url'; |
| import {createServerAdapter} from '@whatwg-node/server'; |
| import {createServer} from 'http'; |
|
|
| |
| dotenv.config(); |
| |
| const __dirname = dirname(fileURLToPath(import.meta.url)); |
| |
| class Config { |
| constructor() { |
| this.API_PREFIX = process.env.API_PREFIX || '/'; |
| this.API_KEY = process.env.API_KEY || ''; |
| this.MAX_RETRY_COUNT = process.env.MAX_RETRY_COUNT || 3; |
| this.RETRY_DELAY = process.env.RETRY_DELAY || 5000; |
| this.COMMON_GRPC = 'runtime-native-io-vertex-inference-grpc-service-lmuw6mcn3q-ul.a.run.app'; |
| this.COMMON_PROTO = path.join(__dirname,'..', 'protos', 'VertexInferenceService.proto') |
| this.GPT_GRPC = 'runtime-native-io-gpt-inference-grpc-service-lmuw6mcn3q-ul.a.run.app'; |
| this.GPT_PROTO = path.join(__dirname,'..', 'protos', 'GPTInferenceService.proto') |
| this.PORT = process.env.PORT || 8787; |
| } |
| } |
| class GRPCHandler { |
| constructor(protoFilePath) { |
| |
| this.packageDefinition = protoLoader.loadSync(protoFilePath, { |
| keepCase: true, |
| longs: String, |
| enums: String, |
| defaults: true, |
| oneofs: true |
| }); |
| } |
| } |
| const config = new Config(); |
| |
| |
| const { preflight, corsify } = cors({ |
| origin: '*', |
| allowMethods: '*', |
| exposeHeaders: '*', |
| }); |
|
|
| |
| const withAuth = (request) => { |
| if (config.API_KEY) { |
| const authHeader = request.headers.get('Authorization'); |
| if (!authHeader || !authHeader.startsWith('Bearer ')) { |
| return error(401, 'Unauthorized: Missing or invalid Authorization header'); |
| } |
| const token = authHeader.substring(7); |
| if (token !== config.API_KEY) { |
| return error(403, 'Forbidden: Invalid API key'); |
| } |
| } |
| }; |
| |
| const logger = (res, req) => { |
| console.log(req.method, res.status, req.url, Date.now() - req.start, 'ms'); |
| }; |
| const router = AutoRouter({ |
| before: [preflight, withAuth], |
| missing: () => error(404, '404 not found.'), |
| finally: [corsify, logger], |
| }); |
| |
| router.get('/', () => json({ message: 'API 服务运行中~' })); |
| router.get('/ping', () => json({ message: 'pong' })); |
| router.get(config.API_PREFIX + '/v1/models', () => |
| json({ |
| object: 'list', |
| data: [ |
| { id: "gpt-4o-mini", object: "model", owned_by: "pieces-os" }, |
| { id: "gpt-4o", object: "model", owned_by: "pieces-os" }, |
| { id: "gpt-4-turbo", object: "model", owned_by: "pieces-os" }, |
| { id: "gpt-4", object: "model", owned_by: "pieces-os" }, |
| { id: "gpt-3.5-turbo", object: "model", owned_by: "pieces-os" }, |
| { id: "claude-3-sonnet@20240229", object: "model", owned_by: "pieces-os" }, |
| { id: "claude-3-opus@20240229", object: "model", owned_by: "pieces-os" }, |
| { id: "claude-3-haiku@20240307", object: "model", owned_by: "pieces-os" }, |
| { id: "claude-3-5-sonnet@20240620", object: "model", owned_by: "pieces-os" }, |
| { id: "gemini-1.5-flash", object: "model", owned_by: "pieces-os" }, |
| { id: "gemini-1.5-pro", object: "model", owned_by: "pieces-os" }, |
| { id: "chat-bison", object: "model", owned_by: "pieces-os" }, |
| { id: "codechat-bison", object: "model", owned_by: "pieces-os" }, |
| ], |
| }) |
| ); |
| router.post(config.API_PREFIX + '/v1/chat/completions', (req) => handleCompletion(req)); |
|
|
| async function GrpcToPieces(models, message, rules, stream, temperature, top_p) { |
| |
| |
| const credentials = grpc.credentials.createSsl(); |
| let client,request; |
| if (models.includes('gpt')){ |
| |
| const packageDefinition = new GRPCHandler(config.GPT_PROTO).packageDefinition; |
| |
| request = { |
| models: models, |
| messages: [ |
| {role: 0, message: rules}, |
| {role: 1, message: message} |
| ], |
| temperature:temperature || 0.1, |
| top_p:top_p ?? 1, |
| } |
| |
| const GRPCobjects = grpc.loadPackageDefinition(packageDefinition).runtime.aot.machine_learning.parents.gpt; |
| client = new GRPCobjects.GPTInferenceService(config.GPT_GRPC, credentials); |
| } else { |
| |
| const packageDefinition = new GRPCHandler(config.COMMON_PROTO).packageDefinition; |
| |
| request = { |
| models: models, |
| args: { |
| messages: { |
| unknown: 1, |
| message: message |
| }, |
| rules: rules |
| } |
| }; |
| |
| const GRPCobjects = grpc.loadPackageDefinition(packageDefinition).runtime.aot.machine_learning.parents.vertex; |
| client = new GRPCobjects.VertexInferenceService(config.COMMON_GRPC, credentials); |
| } |
| return await ConvertOpenai(client,request,models,stream); |
| } |
|
|
| async function messagesProcess(messages) { |
| let rules = ''; |
| let message = ''; |
|
|
| for (const msg of messages) { |
| let role = msg.role; |
| |
| const contentStr = Array.isArray(msg.content) |
| ? msg.content |
| .filter((item) => item.text) |
| .map((item) => item.text) |
| .join('') || '' |
| : msg.content; |
| |
| if (role === 'system') { |
| rules += `system:${contentStr};\r\n`; |
| } else if (['user', 'assistant'].includes(role)) { |
| message += `${role}:${contentStr};\r\n`; |
| } |
| } |
|
|
| return { rules, message }; |
| } |
|
|
| async function ConvertOpenai(client,request,model,stream) { |
| for (let i = 0; i < config.MAX_RETRY_COUNT; i++) { |
| try { |
| if (stream) { |
| const call = client.PredictWithStream(request); |
| const encoder = new TextEncoder(); |
| const ReturnStream = new ReadableStream({ |
| start(controller) { |
| call.on('data', (response) => { |
| let response_code = Number(response.response_code); |
| if (response_code === 204) { |
| |
| controller.close() |
| call.destroy() |
| } else if (response_code === 200) { |
| let response_message |
| if (model.includes('gpt')) { |
| response_message = response.body.message_warpper.message.message; |
| } else { |
| response_message = response.args.args.args.message; |
| } |
| |
| controller.enqueue(encoder.encode(`data: ${JSON.stringify(ChatCompletionStreamWithModel(response_message, model))}\n\n`)); |
| } else { |
| controller.error(new Error(`Error: stream chunk is not success`)); |
| controller.close() |
| } |
| }) |
| } |
| }); |
| return new Response(ReturnStream, { |
| headers: { |
| 'Content-Type': 'text/event-stream', |
| }, |
| }) |
| } else { |
| const call = await new Promise((resolve, reject) => { |
| client.Predict(request, (err, response) => { |
| if (err) reject(err); |
| else resolve(response); |
| }); |
| }); |
| let response_code = Number(call.response_code); |
| if (response_code === 200) { |
| let response_message |
| if (model.includes('gpt')) { |
| response_message = call.body.message_warpper.message.message; |
| } else { |
| response_message = call.args.args.args.message; |
| } |
| return new Response(JSON.stringify(ChatCompletionWithModel(response_message, model)), { |
| headers: { |
| 'Content-Type': 'application/json', |
| }, |
| }); |
| } |
| } |
| } catch (err) { |
| console.error(err); |
| await new Promise((resolve) => setTimeout(resolve, config.RETRY_DELAY)); |
| } |
| } |
| return error(500, err.message); |
| } |
|
|
| function ChatCompletionWithModel(message, model) { |
| return { |
| id: 'Chat-Nekohy', |
| object: 'chat.completion', |
| created: Date.now(), |
| model, |
| usage: { |
| prompt_tokens: 0, |
| completion_tokens: 0, |
| total_tokens: 0, |
| }, |
| choices: [ |
| { |
| message: { |
| content: message, |
| role: 'assistant', |
| }, |
| index: 0, |
| }, |
| ], |
| }; |
| } |
|
|
| function ChatCompletionStreamWithModel(text, model) { |
| return { |
| id: 'chatcmpl-Nekohy', |
| object: 'chat.completion.chunk', |
| created: 0, |
| model, |
| choices: [ |
| { |
| index: 0, |
| delta: { |
| content: text, |
| }, |
| finish_reason: null, |
| }, |
| ], |
| }; |
| } |
|
|
| async function handleCompletion(request) { |
| try { |
| |
| |
| const { model: inputModel, messages, stream,temperature,top_p} = await request.json(); |
| console.log(inputModel,messages,stream) |
| |
| const { rules, message:content } = await messagesProcess(messages); |
| console.log(rules,content) |
| |
| return await GrpcToPieces(inputModel, content, rules, stream, temperature, top_p); |
| } catch (err) { |
| return error(500, err.message); |
| } |
| } |
|
|
| (async () => { |
| if (typeof addEventListener === 'function') return; |
| |
| const ittyServer = createServerAdapter(router.fetch); |
| const httpServer = createServer(ittyServer); |
|
|
| |
| httpServer.on('error', (error) => { |
| console.error('Server error:', error); |
| }); |
|
|
| |
| httpServer.listen(config.PORT, '0.0.0.0', () => { |
| console.log(`Server is running on http://0.0.0.0:${config.PORT}`); |
| }); |
| })(); |