Rfym21 commited on
Commit
b030890
·
verified ·
1 Parent(s): 969fdd6

Upload 6 files

Browse files
Dockerfile CHANGED
@@ -1,27 +1,27 @@
1
- # 使用 node:20.16.0-alpine 作为基础镜像
2
- FROM node:20.16.0-alpine
3
-
4
- # 设置工作目录
5
- WORKDIR /app
6
-
7
- # 复制 package.json 和 package-lock.json 到工作目录
8
- COPY package*.json ./
9
-
10
- # 设置环境变量
11
- ENV API_PREFIX=/api \
12
- API_KEY=123456 \
13
- MAX_RETRY_COUNT=3 \
14
- RETRY_DELAY=10000 \
15
- PORT=8787
16
-
17
- # 安装项目依赖
18
- RUN npm install
19
-
20
- # 复制项目的源代码到工作目录
21
- COPY . .
22
-
23
- # 暴露应用运行的端口(假设应用运行在 3000 端口)
24
- EXPOSE 8787
25
-
26
- # 启动应用
27
- CMD ["node", "index.js"]
 
1
+ # 使用 node:20.16.0-alpine 作为基础镜像
2
+ FROM node:20.16.0-alpine
3
+
4
+ # 设置工作目录
5
+ WORKDIR /app
6
+
7
+ # 复制 package.json 和 package-lock.json 到工作目录
8
+ COPY package*.json ./
9
+
10
+ # 设置环境变量
11
+ ENV API_PREFIX=/api \
12
+ API_KEY=123456 \
13
+ MAX_RETRY_COUNT=3 \
14
+ RETRY_DELAY=10000 \
15
+ PORT=8787
16
+
17
+ # 安装项目依赖
18
+ RUN npm install
19
+
20
+ # 复制项目的源代码到工作目录
21
+ COPY . .
22
+
23
+ # 暴露应用运行的端口(假设应用运行在 3000 端口)
24
+ EXPOSE 8787
25
+
26
+ # 启动应用
27
+ CMD ["node", "api/index.js"]
api/index.js ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import grpc from '@grpc/grpc-js';
2
+ import protoLoader from '@grpc/proto-loader';
3
+ import {AutoRouter, cors, error, json} from 'itty-router';
4
+ import dotenv from 'dotenv';
5
+ import path,{ dirname } from 'path';
6
+ import { fileURLToPath } from 'url';
7
+ import {createServerAdapter} from '@whatwg-node/server';
8
+ import {createServer} from 'http';
9
+
10
+ // 加载环境变量
11
+ dotenv.config();
12
+ // 获取当前文件的目录路径(ESM 方式)
13
+ const __dirname = dirname(fileURLToPath(import.meta.url));
14
+ // 初始化配置
15
+ class Config {
16
+ constructor() {
17
+ this.API_PREFIX = process.env.API_PREFIX || '/';
18
+ this.API_KEY = process.env.API_KEY || '';
19
+ this.MAX_RETRY_COUNT = process.env.MAX_RETRY_COUNT || 3;
20
+ this.RETRY_DELAY = process.env.RETRY_DELAY || 5000;
21
+ this.COMMON_GRPC = 'runtime-native-io-vertex-inference-grpc-service-lmuw6mcn3q-ul.a.run.app';
22
+ this.COMMON_PROTO = path.join(__dirname,'..', 'protos', 'VertexInferenceService.proto')
23
+ this.GPT_GRPC = 'runtime-native-io-gpt-inference-grpc-service-lmuw6mcn3q-ul.a.run.app';
24
+ this.GPT_PROTO = path.join(__dirname,'..', 'protos', 'GPTInferenceService.proto')
25
+ this.PORT = process.env.PORT || 8787;
26
+ }
27
+ }
28
+ class GRPCHandler {
29
+ constructor(protoFilePath) {
30
+ // 动态加载传入的 .proto 文件路径
31
+ this.packageDefinition = protoLoader.loadSync(protoFilePath, {
32
+ keepCase: true,
33
+ longs: String,
34
+ enums: String,
35
+ defaults: true,
36
+ oneofs: true
37
+ });
38
+ }
39
+ }
40
+ const config = new Config();
41
+ // 中间件
42
+ // 添加运行回源
43
+ const { preflight, corsify } = cors({
44
+ origin: '*',
45
+ allowMethods: '*',
46
+ exposeHeaders: '*',
47
+ });
48
+
49
+ // 添加认证
50
+ const withAuth = (request) => {
51
+ if (config.API_KEY) {
52
+ const authHeader = request.headers.get('Authorization');
53
+ if (!authHeader || !authHeader.startsWith('Bearer ')) {
54
+ return error(401, 'Unauthorized: Missing or invalid Authorization header');
55
+ }
56
+ const token = authHeader.substring(7);
57
+ if (token !== config.API_KEY) {
58
+ return error(403, 'Forbidden: Invalid API key');
59
+ }
60
+ }
61
+ };
62
+ // 返回运行信息
63
+ const logger = (res, req) => {
64
+ console.log(req.method, res.status, req.url, Date.now() - req.start, 'ms');
65
+ };
66
+ const router = AutoRouter({
67
+ before: [preflight, withAuth],
68
+ missing: () => error(404, '404 not found.'),
69
+ finally: [corsify, logger],
70
+ });
71
+ // Router路径
72
+ router.get('/', () => json({ message: 'API 服务运行中~' }));
73
+ router.get('/ping', () => json({ message: 'pong' }));
74
+ router.get(config.API_PREFIX + '/v1/models', () =>
75
+ json({
76
+ object: 'list',
77
+ data: [
78
+ { id: "gpt-4o-mini", object: "model", owned_by: "pieces-os" },
79
+ { id: "gpt-4o", object: "model", owned_by: "pieces-os" },
80
+ { id: "gpt-4-turbo", object: "model", owned_by: "pieces-os" },
81
+ { id: "gpt-4", object: "model", owned_by: "pieces-os" },
82
+ { id: "gpt-3.5-turbo", object: "model", owned_by: "pieces-os" },
83
+ { id: "claude-3-sonnet@20240229", object: "model", owned_by: "pieces-os" },
84
+ { id: "claude-3-opus@20240229", object: "model", owned_by: "pieces-os" },
85
+ { id: "claude-3-haiku@20240307", object: "model", owned_by: "pieces-os" },
86
+ { id: "claude-3-5-sonnet@20240620", object: "model", owned_by: "pieces-os" },
87
+ { id: "gemini-1.5-flash", object: "model", owned_by: "pieces-os" },
88
+ { id: "gemini-1.5-pro", object: "model", owned_by: "pieces-os" },
89
+ { id: "chat-bison", object: "model", owned_by: "pieces-os" },
90
+ { id: "codechat-bison", object: "model", owned_by: "pieces-os" },
91
+ ],
92
+ })
93
+ );
94
+ router.post(config.API_PREFIX + '/v1/chat/completions', (req) => handleCompletion(req));
95
+
96
+ async function GrpcToPieces(models, message, rules, stream, temperature, top_p) {
97
+ // 在非GPT类型的模型中,temperature和top_p是无效的
98
+ // 使用系统的根证书
99
+ const credentials = grpc.credentials.createSsl();
100
+ let client,request;
101
+ if (models.includes('gpt')){
102
+ // 加载proto文件
103
+ const packageDefinition = new GRPCHandler(config.GPT_PROTO).packageDefinition;
104
+ // 构建请求消息
105
+ request = {
106
+ models: models,
107
+ messages: [
108
+ {role: 0, message: rules}, // system
109
+ {role: 1, message: message} // user
110
+ ],
111
+ temperature:temperature || 0.1,
112
+ top_p:top_p ?? 1,
113
+ }
114
+ // 获取gRPC对象
115
+ const GRPCobjects = grpc.loadPackageDefinition(packageDefinition).runtime.aot.machine_learning.parents.gpt;
116
+ client = new GRPCobjects.GPTInferenceService(config.GPT_GRPC, credentials);
117
+ } else {
118
+ // 加载proto文件
119
+ const packageDefinition = new GRPCHandler(config.COMMON_PROTO).packageDefinition;
120
+ // 构建请求消息
121
+ request = {
122
+ models: models,
123
+ args: {
124
+ messages: {
125
+ unknown: 1,
126
+ message: message
127
+ },
128
+ rules: rules
129
+ }
130
+ };
131
+ // 获取gRPC对象
132
+ const GRPCobjects = grpc.loadPackageDefinition(packageDefinition).runtime.aot.machine_learning.parents.vertex;
133
+ client = new GRPCobjects.VertexInferenceService(config.COMMON_GRPC, credentials);
134
+ }
135
+ return await ConvertOpenai(client,request,models,stream);
136
+ }
137
+
138
+ async function messagesProcess(messages) {
139
+ let rules = '';
140
+ let message = '';
141
+
142
+ for (const msg of messages) {
143
+ let role = msg.role;
144
+ // 格式化为字符串
145
+ const contentStr = Array.isArray(msg.content)
146
+ ? msg.content
147
+ .filter((item) => item.text)
148
+ .map((item) => item.text)
149
+ .join('') || ''
150
+ : msg.content;
151
+ // 判断身份
152
+ if (role === 'system') {
153
+ rules += `system:${contentStr};\r\n`;
154
+ } else if (['user', 'assistant'].includes(role)) {
155
+ message += `${role}:${contentStr};\r\n`;
156
+ }
157
+ }
158
+
159
+ return { rules, message };
160
+ }
161
+
162
+ async function ConvertOpenai(client,request,model,stream) {
163
+ for (let i = 0; i < config.MAX_RETRY_COUNT; i++) {
164
+ try {
165
+ if (stream) {
166
+ const call = client.PredictWithStream(request);
167
+ const encoder = new TextEncoder();
168
+ const ReturnStream = new ReadableStream({
169
+ start(controller) {
170
+ call.on('data', (response) => {
171
+ let response_code = Number(response.response_code);
172
+ if (response_code === 204) {
173
+ // 如果 response_code 是 204,关闭流
174
+ controller.close()
175
+ call.destroy()
176
+ } else if (response_code === 200) {
177
+ let response_message
178
+ if (model.includes('gpt')) {
179
+ response_message = response.body.message_warpper.message.message;
180
+ } else {
181
+ response_message = response.args.args.args.message;
182
+ }
183
+ // 否则,将数据块加入流中
184
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(ChatCompletionStreamWithModel(response_message, model))}\n\n`));
185
+ } else {
186
+ controller.error(new Error(`Error: stream chunk is not success`));
187
+ controller.close()
188
+ }
189
+ })
190
+ }
191
+ });
192
+ return new Response(ReturnStream, {
193
+ headers: {
194
+ 'Content-Type': 'text/event-stream',
195
+ },
196
+ })
197
+ } else {
198
+ const call = await new Promise((resolve, reject) => {
199
+ client.Predict(request, (err, response) => {
200
+ if (err) reject(err);
201
+ else resolve(response);
202
+ });
203
+ });
204
+ let response_code = Number(call.response_code);
205
+ if (response_code === 200) {
206
+ let response_message
207
+ if (model.includes('gpt')) {
208
+ response_message = call.body.message_warpper.message.message;
209
+ } else {
210
+ response_message = call.args.args.args.message;
211
+ }
212
+ return new Response(JSON.stringify(ChatCompletionWithModel(response_message, model)), {
213
+ headers: {
214
+ 'Content-Type': 'application/json',
215
+ },
216
+ });
217
+ }
218
+ }
219
+ } catch (err) {
220
+ console.error(err);
221
+ await new Promise((resolve) => setTimeout(resolve, config.RETRY_DELAY));
222
+ }
223
+ }
224
+ return error(500, err.message);
225
+ }
226
+
227
+ function renameIfNeeded(input) {
228
+ // 替换的正则表达式
229
+ const regex = /^(claude-3-(5-sonnet|haiku|sonnet|opus))-(\d{8})$/;
230
+ const match = input.match(regex);
231
+ if (match) {
232
+ return `${match[1]}@${match[3]}`;
233
+ }
234
+ return input;
235
+ }
236
+
237
+ function ChatCompletionWithModel(message, model) {
238
+ return {
239
+ id: 'Chat-Nekohy',
240
+ object: 'chat.completion',
241
+ created: Date.now(),
242
+ model,
243
+ usage: {
244
+ prompt_tokens: 0,
245
+ completion_tokens: 0,
246
+ total_tokens: 0,
247
+ },
248
+ choices: [
249
+ {
250
+ message: {
251
+ content: message,
252
+ role: 'assistant',
253
+ },
254
+ index: 0,
255
+ },
256
+ ],
257
+ };
258
+ }
259
+
260
+ function ChatCompletionStreamWithModel(text, model) {
261
+ return {
262
+ id: 'chatcmpl-Nekohy',
263
+ object: 'chat.completion.chunk',
264
+ created: 0,
265
+ model,
266
+ choices: [
267
+ {
268
+ index: 0,
269
+ delta: {
270
+ content: text,
271
+ },
272
+ finish_reason: null,
273
+ },
274
+ ],
275
+ };
276
+ }
277
+
278
+ async function handleCompletion(request) {
279
+ try {
280
+ // todo stream逆向接口
281
+ // 解析openai格式API请求
282
+ const { model: inputModel, messages, stream,temperature,top_p} = await request.json();
283
+ console.log(inputModel,messages,stream)
284
+ const model = renameIfNeeded(inputModel);
285
+ // 解析system和user/assistant消息
286
+ const { rules, message:content } = await messagesProcess(messages);
287
+ console.log(rules,content)
288
+ // 响应码,回复的消息
289
+ return await GrpcToPieces(model, content, rules, stream, temperature, top_p);
290
+ } catch (err) {
291
+ return error(500, err.message);
292
+ }
293
+ }
294
+
295
+ (async () => {
296
+ //For Cloudflare Workers
297
+ if (typeof addEventListener === 'function') return;
298
+ // For Nodejs
299
+ const ittyServer = createServerAdapter(router.fetch);
300
+ console.log(`Listening on http://localhost:${config.PORT}`);
301
+ const httpServer = createServer(ittyServer);
302
+ httpServer.listen(config.PORT);
303
+ })();
cloud_model.json ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "iterable": [
3
+ {
4
+ "version": "t16-v01-i01",
5
+ "created": {
6
+ "value": "2023-09-15T16:00:00.000Z",
7
+ "readable": "about a year ago"
8
+ },
9
+ "name": "(PaLM2) Chat Model",
10
+ "unique": "chat-bison",
11
+ "provider": "GOOGLE",
12
+ "maxTokens": {
13
+ "total": 9000,
14
+ "input": 8000,
15
+ "output": 1000
16
+ }
17
+ },
18
+ {
19
+ "version": "t31-v01-i01",
20
+ "created": {
21
+ "value": "2024-07-17T16:00:00.000Z",
22
+ "readable": "4 months ago"
23
+ },
24
+ "name": "GPT-4o Mini Chat Model",
25
+ "unique": "gpt-4o-mini",
26
+ "provider": "OPENAI",
27
+ "maxTokens": {
28
+ "total": 80384,
29
+ "input": 64000,
30
+ "output": 16384
31
+ }
32
+ },
33
+ {
34
+ "version": "t25-v01-i01",
35
+ "created": {
36
+ "value": "2024-05-27T16:00:00.000Z",
37
+ "readable": "5 months ago"
38
+ },
39
+ "name": "Gemini-1.5 Pro Chat Model",
40
+ "unique": "gemini-1.5-pro",
41
+ "provider": "GOOGLE",
42
+ "maxTokens": {
43
+ "total": 128000,
44
+ "input": 119808,
45
+ "output": 8192
46
+ }
47
+ },
48
+ {
49
+ "version": "t24-v01-i01",
50
+ "created": {
51
+ "value": "2024-05-14T16:00:00.000Z",
52
+ "readable": "6 months ago"
53
+ },
54
+ "name": "GPT-4o Chat Model",
55
+ "unique": "gpt-4o",
56
+ "provider": "OPENAI",
57
+ "maxTokens": {
58
+ "total": 68096,
59
+ "input": 64000,
60
+ "output": 4096
61
+ }
62
+ },
63
+ {
64
+ "version": "t15-v01-i01",
65
+ "created": {
66
+ "value": "2023-09-15T16:00:00.000Z",
67
+ "readable": "about a year ago"
68
+ },
69
+ "name": "Codey (PaLM2) Chat Model",
70
+ "unique": "codechat-bison",
71
+ "provider": "GOOGLE",
72
+ "maxTokens": {
73
+ "total": 7000,
74
+ "input": 6000,
75
+ "output": 1000
76
+ }
77
+ },
78
+ {
79
+ "version": "t28-v01-i01",
80
+ "created": {
81
+ "value": "2024-02-28T16:00:00.000Z",
82
+ "readable": "8 months ago"
83
+ },
84
+ "name": "Claude 3 Sonnet Chat Model",
85
+ "unique": "claude-3-sonnet@20240229",
86
+ "provider": "ANTHROPIC",
87
+ "maxTokens": {
88
+ "total": 40000,
89
+ "input": 35000,
90
+ "output": 4096
91
+ }
92
+ },
93
+ {
94
+ "version": "t22-v01-i01",
95
+ "created": {
96
+ "value": "2024-01-07T16:00:00.000Z",
97
+ "readable": "10 months ago"
98
+ },
99
+ "name": "(Gemini) Chat Model",
100
+ "unique": "gemini-pro",
101
+ "provider": "GOOGLE",
102
+ "maxTokens": {
103
+ "total": 32000,
104
+ "input": 20000,
105
+ "output": 8000
106
+ }
107
+ },
108
+ {
109
+ "version": "t29-v01-i01",
110
+ "created": {
111
+ "value": "2024-02-28T16:00:00.000Z",
112
+ "readable": "8 months ago"
113
+ },
114
+ "name": "Claude 3 Opus Chat Model",
115
+ "unique": "claude-3-opus@20240229",
116
+ "provider": "ANTHROPIC",
117
+ "maxTokens": {
118
+ "total": 40000,
119
+ "input": 35000,
120
+ "output": 4096
121
+ }
122
+ },
123
+ {
124
+ "version": "t23-v01-i01",
125
+ "created": {
126
+ "value": "2024-02-08T16:00:00.000Z",
127
+ "readable": "9 months ago"
128
+ },
129
+ "name": "GPT-4 Turbo Chat Model",
130
+ "unique": "gpt-4-turbo",
131
+ "provider": "OPENAI",
132
+ "maxTokens": {
133
+ "total": 68096,
134
+ "input": 64000,
135
+ "output": 4096
136
+ }
137
+ },
138
+ {
139
+ "version": "t26-v01-i01",
140
+ "created": {
141
+ "value": "2024-05-27T16:00:00.000Z",
142
+ "readable": "5 months ago"
143
+ },
144
+ "name": "Gemini-1.5 Flash Chat Model",
145
+ "unique": "gemini-1.5-flash",
146
+ "provider": "GOOGLE",
147
+ "maxTokens": {
148
+ "total": 128000,
149
+ "input": 119808,
150
+ "output": 8192
151
+ }
152
+ },
153
+ {
154
+ "version": "t27-v01-i01",
155
+ "created": {
156
+ "value": "2024-06-19T16:00:00.000Z",
157
+ "readable": "5 months ago"
158
+ },
159
+ "name": "Claude 3.5 Sonnet Chat Model",
160
+ "unique": "claude-3-5-sonnet@20240620",
161
+ "provider": "ANTHROPIC",
162
+ "maxTokens": {
163
+ "total": 40000,
164
+ "input": 35000,
165
+ "output": 4096
166
+ }
167
+ },
168
+ {
169
+ "version": "t30-v01-i01",
170
+ "created": {
171
+ "value": "2024-03-26T16:00:00.000Z",
172
+ "readable": "7 months ago"
173
+ },
174
+ "name": "Claude 3 Haiku Chat Model",
175
+ "unique": "claude-3-haiku@20240307",
176
+ "provider": "ANTHROPIC",
177
+ "maxTokens": {
178
+ "total": 40000,
179
+ "input": 35000,
180
+ "output": 4096
181
+ }
182
+ },
183
+ {
184
+ "version": "t13-v01-i01",
185
+ "created": {
186
+ "value": "2023-09-15T16:00:00.000Z",
187
+ "readable": "about a year ago"
188
+ },
189
+ "name": "GPT-3.5-turbo Chat Model",
190
+ "unique": "gpt-3.5-turbo",
191
+ "provider": "OPENAI",
192
+ "maxTokens": {
193
+ "total": 16000,
194
+ "input": 12000,
195
+ "output": 4000
196
+ }
197
+ },
198
+ {
199
+ "version": "t14-v01-i01",
200
+ "created": {
201
+ "value": "2023-09-15T16:00:00.000Z",
202
+ "readable": "about a year ago"
203
+ },
204
+ "name": "GPT-4 Chat Model",
205
+ "unique": "gpt-4",
206
+ "provider": "OPENAI",
207
+ "maxTokens": {
208
+ "total": 8000,
209
+ "input": 4100,
210
+ "output": 3900
211
+ }
212
+ }
213
+ ]
214
+ }
package.json CHANGED
@@ -3,10 +3,11 @@
3
  "version": "1.0.0",
4
  "type": "module",
5
  "description": "",
6
- "main": "index.js",
7
  "scripts": {
8
- "dev": "node index.js"
9
  },
 
10
  "private": true,
11
  "dependencies": {
12
  "@grpc/grpc-js": "^1.12.2",
 
3
  "version": "1.0.0",
4
  "type": "module",
5
  "description": "",
6
+ "main": "api/index.js",
7
  "scripts": {
8
+ "start": "node api/index.js"
9
  },
10
+ "author": "Nekohy",
11
  "private": true,
12
  "dependencies": {
13
  "@grpc/grpc-js": "^1.12.2",
protos/GPTInferenceService.proto ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ syntax = "proto3"; // 使用Protobuf版本3
2
+
3
+ package runtime.aot.machine_learning.parents.gpt;
4
+
5
+ service GPTInferenceService {
6
+ rpc Predict (Request) returns (Response);
7
+ rpc PredictWithStream (Request) returns (stream Response);
8
+ }
9
+
10
+ // Global
11
+ message Message {
12
+ uint64 role = 1; // 值 0 为 system,1 为普通,回复中只有 1
13
+ string message = 2;
14
+ }
15
+
16
+ // Requests
17
+ message Request {
18
+ string models = 1; // 模型名称
19
+ repeated Message messages = 2; // 消息列表
20
+ double temperature = 3; // 采样温度
21
+ double top_p = 4; // 核心采样
22
+ }
23
+
24
+ // Response
25
+ message Response {
26
+ uint64 response_code = 2; // 返回状态码,200,439 等,204 为终止
27
+ optional Body body = 4;
28
+ }
29
+
30
+ message Body{
31
+ string id = 1; // eg.chatcmpl-ANcM3OsoLf6AXJpO76pDoW7Ry68oc
32
+ string object = 2; // eg. chat.completion
33
+ uint64 time = 3; // UNIX时间戳
34
+ MessageWarpper message_warpper = 4;
35
+ Unknown unknown = 5;
36
+ }
37
+
38
+ message MessageWarpper {
39
+ int64 arg1 = 1; // 也许是常量1
40
+ Message message = 2; // 回应的消息 wt u need
41
+ }
42
+
43
+ message Unknown {
44
+ // 都不知道
45
+ int64 arg1 = 1;
46
+ int64 arg2 = 2;
47
+ int64 arg3 = 3;
48
+ }
protos/VertexInferenceService.proto ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ syntax = "proto3"; // 使用Protobuf版本3
2
+
3
+ package runtime.aot.machine_learning.parents.vertex;
4
+
5
+ service VertexInferenceService {
6
+ rpc Predict (Requests) returns (Response);
7
+ rpc PredictWithStream (Requests) returns (stream Response);
8
+ }
9
+ //Global
10
+ message Messages{
11
+ int64 unknown = 1; // 也许只是常量1(请求)/3(回应)
12
+ string message = 2; // 消息
13
+ }
14
+
15
+ // Requests
16
+ message Requests {
17
+ string models = 1; // 模型名称
18
+ Args args = 2;
19
+ }
20
+
21
+ message Args {
22
+ reserved 1;
23
+ Messages messages = 2; // 消息,可以视作user
24
+ string rules = 3; // 规则,可以视作system
25
+ }
26
+
27
+ // Response
28
+ message Response {
29
+ int64 response_code = 2; // 返回状态码,200,439等
30
+ Args1 args = 4;
31
+ }
32
+
33
+ message Args1{
34
+ Args2 args = 1; // 不知道什么意义的套一层
35
+ }
36
+
37
+ message Args2{
38
+ Messages args = 2; // 不知道什么意义的套一层
39
+ }