gordonchan commited on
Commit
170f370
·
verified ·
1 Parent(s): 7b052bb

Upload 32 files

Browse files
request_llms/README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ P.S. 如果您按照以下步骤成功接入了新的大模型,欢迎发Pull Requests(如果您在自己接入新模型的过程中遇到困难,欢迎加README底部QQ群联系群主)
2
+
3
+
4
+ # 如何接入其他本地大语言模型
5
+
6
+ 1. 复制`request_llms/bridge_llama2.py`,重命名为你喜欢的名字
7
+
8
+ 2. 修改`load_model_and_tokenizer`方法,加载你的模型和分词器(去该模型官网找demo,复制粘贴即可)
9
+
10
+ 3. 修改`llm_stream_generator`方法,定义推理模型(去该模型官网找demo,复制粘贴即可)
11
+
12
+ 4. 命令行测试
13
+ - 修改`tests/test_llms.py`(聪慧如您,只需要看一眼该文件就明白怎么修改了)
14
+ - 运行`python tests/test_llms.py`
15
+
16
+ 5. 测试通过后,在`request_llms/bridge_all.py`中做最后的修改,把你的模型完全接入到框架中(聪慧如您,只需要看一眼该文件就明白怎么修改了)
17
+
18
+ 6. 修改`LLM_MODEL`配置,然后运行`python main.py`,测试最后的效果
19
+
20
+
21
+ # 如何接入其他在线大语言模型
22
+
23
+ 1. 复制`request_llms/bridge_zhipu.py`,重命名为你喜欢的名字
24
+
25
+ 2. 修改`predict_no_ui_long_connection`
26
+
27
+ 3. 修改`predict`
28
+
29
+ 4. 命令行测试
30
+ - 修改`tests/test_llms.py`(聪慧如您,只需要看一眼该文件就明白怎么修改了)
31
+ - 运行`python tests/test_llms.py`
32
+
33
+ 5. 测试通过后,在`request_llms/bridge_all.py`中做最后的修改,把你的模型完全接入到框架中(聪慧如您,只需要看一眼该文件就明白怎么修改了)
34
+
35
+ 6. 修改`LLM_MODEL`配置,然后运行`python main.py`,测试最后的效果
request_llms/bridge_all.py ADDED
@@ -0,0 +1,729 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ 该文件中主要包含2个函数,是所有LLM的通用接口,它们会继续向下调用更底层的LLM模型,处理多模型并行等细节
4
+
5
+ 不具备多线程能力的函数:正常对话时使用,具备完备的交互功能,不可多线程
6
+ 1. predict(...)
7
+
8
+ 具备多线程调用能力的函数:在函数插件中被调用,灵活而简洁
9
+ 2. predict_no_ui_long_connection(...)
10
+ """
11
+ import tiktoken, copy
12
+ from functools import lru_cache
13
+ from concurrent.futures import ThreadPoolExecutor
14
+ from toolbox import get_conf, trimmed_format_exc
15
+
16
+ from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
17
+ from .bridge_chatgpt import predict as chatgpt_ui
18
+
19
+ from .bridge_chatgpt_vision import predict_no_ui_long_connection as chatgpt_vision_noui
20
+ from .bridge_chatgpt_vision import predict as chatgpt_vision_ui
21
+
22
+ from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
23
+ from .bridge_chatglm import predict as chatglm_ui
24
+
25
+ from .bridge_chatglm3 import predict_no_ui_long_connection as chatglm3_noui
26
+ from .bridge_chatglm3 import predict as chatglm3_ui
27
+
28
+ from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
29
+ from .bridge_qianfan import predict as qianfan_ui
30
+
31
+ from .bridge_google_gemini import predict as genai_ui
32
+ from .bridge_google_gemini import predict_no_ui_long_connection as genai_noui
33
+
34
+ colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044']
35
+
36
+ class LazyloadTiktoken(object):
37
+ def __init__(self, model):
38
+ self.model = model
39
+
40
+ @staticmethod
41
+ @lru_cache(maxsize=128)
42
+ def get_encoder(model):
43
+ print('正在加载tokenizer,如果是第一次运行,可能需要一点时间下载参数')
44
+ tmp = tiktoken.encoding_for_model(model)
45
+ print('加载tokenizer完毕')
46
+ return tmp
47
+
48
+ def encode(self, *args, **kwargs):
49
+ encoder = self.get_encoder(self.model)
50
+ return encoder.encode(*args, **kwargs)
51
+
52
+ def decode(self, *args, **kwargs):
53
+ encoder = self.get_encoder(self.model)
54
+ return encoder.decode(*args, **kwargs)
55
+
56
+ # Endpoint 重定向
57
+ API_URL_REDIRECT, AZURE_ENDPOINT, AZURE_ENGINE = get_conf("API_URL_REDIRECT", "AZURE_ENDPOINT", "AZURE_ENGINE")
58
+ openai_endpoint = "https://api.openai.com/v1/chat/completions"
59
+ api2d_endpoint = "https://openai.api2d.net/v1/chat/completions"
60
+ newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub"
61
+ if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
62
+ azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
63
+ # 兼容旧版的配置
64
+ try:
65
+ API_URL = get_conf("API_URL")
66
+ if API_URL != "https://api.openai.com/v1/chat/completions":
67
+ openai_endpoint = API_URL
68
+ print("警告!API_URL配置选项将被弃用,请更换为API_URL_REDIRECT配置")
69
+ except:
70
+ pass
71
+ # 新版配置
72
+ if openai_endpoint in API_URL_REDIRECT: openai_endpoint = API_URL_REDIRECT[openai_endpoint]
73
+ if api2d_endpoint in API_URL_REDIRECT: api2d_endpoint = API_URL_REDIRECT[api2d_endpoint]
74
+ if newbing_endpoint in API_URL_REDIRECT: newbing_endpoint = API_URL_REDIRECT[newbing_endpoint]
75
+
76
+
77
+ # 获取tokenizer
78
+ tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
79
+ tokenizer_gpt4 = LazyloadTiktoken("gpt-4")
80
+ get_token_num_gpt35 = lambda txt: len(tokenizer_gpt35.encode(txt, disallowed_special=()))
81
+ get_token_num_gpt4 = lambda txt: len(tokenizer_gpt4.encode(txt, disallowed_special=()))
82
+
83
+
84
+ # 开始初始化模型
85
+ AVAIL_LLM_MODELS, LLM_MODEL = get_conf("AVAIL_LLM_MODELS", "LLM_MODEL")
86
+ AVAIL_LLM_MODELS = AVAIL_LLM_MODELS + [LLM_MODEL]
87
+ # -=-=-=-=-=-=- 以下这部分是最早加入的最稳定的模型 -=-=-=-=-=-=-
88
+ model_info = {
89
+ # openai
90
+ "gpt-3.5-turbo": {
91
+ "fn_with_ui": chatgpt_ui,
92
+ "fn_without_ui": chatgpt_noui,
93
+ "endpoint": openai_endpoint,
94
+ "max_token": 4096,
95
+ "tokenizer": tokenizer_gpt35,
96
+ "token_cnt": get_token_num_gpt35,
97
+ },
98
+
99
+ "gpt-3.5-turbo-16k": {
100
+ "fn_with_ui": chatgpt_ui,
101
+ "fn_without_ui": chatgpt_noui,
102
+ "endpoint": openai_endpoint,
103
+ "max_token": 16385,
104
+ "tokenizer": tokenizer_gpt35,
105
+ "token_cnt": get_token_num_gpt35,
106
+ },
107
+
108
+ "gpt-3.5-turbo-0613": {
109
+ "fn_with_ui": chatgpt_ui,
110
+ "fn_without_ui": chatgpt_noui,
111
+ "endpoint": openai_endpoint,
112
+ "max_token": 4096,
113
+ "tokenizer": tokenizer_gpt35,
114
+ "token_cnt": get_token_num_gpt35,
115
+ },
116
+
117
+ "gpt-3.5-turbo-16k-0613": {
118
+ "fn_with_ui": chatgpt_ui,
119
+ "fn_without_ui": chatgpt_noui,
120
+ "endpoint": openai_endpoint,
121
+ "max_token": 16385,
122
+ "tokenizer": tokenizer_gpt35,
123
+ "token_cnt": get_token_num_gpt35,
124
+ },
125
+
126
+ "gpt-3.5-turbo-1106": {#16k
127
+ "fn_with_ui": chatgpt_ui,
128
+ "fn_without_ui": chatgpt_noui,
129
+ "endpoint": openai_endpoint,
130
+ "max_token": 16385,
131
+ "tokenizer": tokenizer_gpt35,
132
+ "token_cnt": get_token_num_gpt35,
133
+ },
134
+
135
+ "gpt-4": {
136
+ "fn_with_ui": chatgpt_ui,
137
+ "fn_without_ui": chatgpt_noui,
138
+ "endpoint": openai_endpoint,
139
+ "max_token": 8192,
140
+ "tokenizer": tokenizer_gpt4,
141
+ "token_cnt": get_token_num_gpt4,
142
+ },
143
+
144
+ "gpt-4-32k": {
145
+ "fn_with_ui": chatgpt_ui,
146
+ "fn_without_ui": chatgpt_noui,
147
+ "endpoint": openai_endpoint,
148
+ "max_token": 32768,
149
+ "tokenizer": tokenizer_gpt4,
150
+ "token_cnt": get_token_num_gpt4,
151
+ },
152
+
153
+ "gpt-4-1106-preview": {
154
+ "fn_with_ui": chatgpt_ui,
155
+ "fn_without_ui": chatgpt_noui,
156
+ "endpoint": openai_endpoint,
157
+ "max_token": 128000,
158
+ "tokenizer": tokenizer_gpt4,
159
+ "token_cnt": get_token_num_gpt4,
160
+ },
161
+
162
+ "gpt-3.5-random": {
163
+ "fn_with_ui": chatgpt_ui,
164
+ "fn_without_ui": chatgpt_noui,
165
+ "endpoint": openai_endpoint,
166
+ "max_token": 4096,
167
+ "tokenizer": tokenizer_gpt4,
168
+ "token_cnt": get_token_num_gpt4,
169
+ },
170
+
171
+ "gpt-4-vision-preview": {
172
+ "fn_with_ui": chatgpt_vision_ui,
173
+ "fn_without_ui": chatgpt_vision_noui,
174
+ "endpoint": openai_endpoint,
175
+ "max_token": 4096,
176
+ "tokenizer": tokenizer_gpt4,
177
+ "token_cnt": get_token_num_gpt4,
178
+ },
179
+
180
+
181
+ # azure openai
182
+ "azure-gpt-3.5":{
183
+ "fn_with_ui": chatgpt_ui,
184
+ "fn_without_ui": chatgpt_noui,
185
+ "endpoint": azure_endpoint,
186
+ "max_token": 4096,
187
+ "tokenizer": tokenizer_gpt35,
188
+ "token_cnt": get_token_num_gpt35,
189
+ },
190
+
191
+ "azure-gpt-4":{
192
+ "fn_with_ui": chatgpt_ui,
193
+ "fn_without_ui": chatgpt_noui,
194
+ "endpoint": azure_endpoint,
195
+ "max_token": 8192,
196
+ "tokenizer": tokenizer_gpt4,
197
+ "token_cnt": get_token_num_gpt4,
198
+ },
199
+
200
+ # api_2d (此后不需要在此处添加api2d的接口了,因为下面的代码会自动添加)
201
+ "api2d-gpt-3.5-turbo": {
202
+ "fn_with_ui": chatgpt_ui,
203
+ "fn_without_ui": chatgpt_noui,
204
+ "endpoint": api2d_endpoint,
205
+ "max_token": 4096,
206
+ "tokenizer": tokenizer_gpt35,
207
+ "token_cnt": get_token_num_gpt35,
208
+ },
209
+
210
+ "api2d-gpt-4": {
211
+ "fn_with_ui": chatgpt_ui,
212
+ "fn_without_ui": chatgpt_noui,
213
+ "endpoint": api2d_endpoint,
214
+ "max_token": 8192,
215
+ "tokenizer": tokenizer_gpt4,
216
+ "token_cnt": get_token_num_gpt4,
217
+ },
218
+
219
+ # 将 chatglm 直接对齐到 chatglm2
220
+ "chatglm": {
221
+ "fn_with_ui": chatglm_ui,
222
+ "fn_without_ui": chatglm_noui,
223
+ "endpoint": None,
224
+ "max_token": 1024,
225
+ "tokenizer": tokenizer_gpt35,
226
+ "token_cnt": get_token_num_gpt35,
227
+ },
228
+ "chatglm2": {
229
+ "fn_with_ui": chatglm_ui,
230
+ "fn_without_ui": chatglm_noui,
231
+ "endpoint": None,
232
+ "max_token": 1024,
233
+ "tokenizer": tokenizer_gpt35,
234
+ "token_cnt": get_token_num_gpt35,
235
+ },
236
+ "chatglm3": {
237
+ "fn_with_ui": chatglm3_ui,
238
+ "fn_without_ui": chatglm3_noui,
239
+ "endpoint": None,
240
+ "max_token": 8192,
241
+ "tokenizer": tokenizer_gpt35,
242
+ "token_cnt": get_token_num_gpt35,
243
+ },
244
+ "qianfan": {
245
+ "fn_with_ui": qianfan_ui,
246
+ "fn_without_ui": qianfan_noui,
247
+ "endpoint": None,
248
+ "max_token": 2000,
249
+ "tokenizer": tokenizer_gpt35,
250
+ "token_cnt": get_token_num_gpt35,
251
+ },
252
+ "gemini-pro": {
253
+ "fn_with_ui": genai_ui,
254
+ "fn_without_ui": genai_noui,
255
+ "endpoint": None,
256
+ "max_token": 1024 * 32,
257
+ "tokenizer": tokenizer_gpt35,
258
+ "token_cnt": get_token_num_gpt35,
259
+ },
260
+ "gemini-pro-vision": {
261
+ "fn_with_ui": genai_ui,
262
+ "fn_without_ui": genai_noui,
263
+ "endpoint": None,
264
+ "max_token": 1024 * 32,
265
+ "tokenizer": tokenizer_gpt35,
266
+ "token_cnt": get_token_num_gpt35,
267
+ },
268
+ }
269
+
270
+ # -=-=-=-=-=-=- api2d 对齐支持 -=-=-=-=-=-=-
271
+ for model in AVAIL_LLM_MODELS:
272
+ if model.startswith('api2d-') and (model.replace('api2d-','') in model_info.keys()):
273
+ mi = copy.deepcopy(model_info[model.replace('api2d-','')])
274
+ mi.update({"endpoint": api2d_endpoint})
275
+ model_info.update({model: mi})
276
+
277
+ # -=-=-=-=-=-=- azure 对齐支持 -=-=-=-=-=-=-
278
+ for model in AVAIL_LLM_MODELS:
279
+ if model.startswith('azure-') and (model.replace('azure-','') in model_info.keys()):
280
+ mi = copy.deepcopy(model_info[model.replace('azure-','')])
281
+ mi.update({"endpoint": azure_endpoint})
282
+ model_info.update({model: mi})
283
+
284
+ # -=-=-=-=-=-=- 以下部分是新加入的模型,可能附带额外依赖 -=-=-=-=-=-=-
285
+ if "claude-1-100k" in AVAIL_LLM_MODELS or "claude-2" in AVAIL_LLM_MODELS:
286
+ from .bridge_claude import predict_no_ui_long_connection as claude_noui
287
+ from .bridge_claude import predict as claude_ui
288
+ model_info.update({
289
+ "claude-1-100k": {
290
+ "fn_with_ui": claude_ui,
291
+ "fn_without_ui": claude_noui,
292
+ "endpoint": None,
293
+ "max_token": 8196,
294
+ "tokenizer": tokenizer_gpt35,
295
+ "token_cnt": get_token_num_gpt35,
296
+ },
297
+ })
298
+ model_info.update({
299
+ "claude-2": {
300
+ "fn_with_ui": claude_ui,
301
+ "fn_without_ui": claude_noui,
302
+ "endpoint": None,
303
+ "max_token": 8196,
304
+ "tokenizer": tokenizer_gpt35,
305
+ "token_cnt": get_token_num_gpt35,
306
+ },
307
+ })
308
+ if "jittorllms_rwkv" in AVAIL_LLM_MODELS:
309
+ from .bridge_jittorllms_rwkv import predict_no_ui_long_connection as rwkv_noui
310
+ from .bridge_jittorllms_rwkv import predict as rwkv_ui
311
+ model_info.update({
312
+ "jittorllms_rwkv": {
313
+ "fn_with_ui": rwkv_ui,
314
+ "fn_without_ui": rwkv_noui,
315
+ "endpoint": None,
316
+ "max_token": 1024,
317
+ "tokenizer": tokenizer_gpt35,
318
+ "token_cnt": get_token_num_gpt35,
319
+ },
320
+ })
321
+ if "jittorllms_llama" in AVAIL_LLM_MODELS:
322
+ from .bridge_jittorllms_llama import predict_no_ui_long_connection as llama_noui
323
+ from .bridge_jittorllms_llama import predict as llama_ui
324
+ model_info.update({
325
+ "jittorllms_llama": {
326
+ "fn_with_ui": llama_ui,
327
+ "fn_without_ui": llama_noui,
328
+ "endpoint": None,
329
+ "max_token": 1024,
330
+ "tokenizer": tokenizer_gpt35,
331
+ "token_cnt": get_token_num_gpt35,
332
+ },
333
+ })
334
+ if "jittorllms_pangualpha" in AVAIL_LLM_MODELS:
335
+ from .bridge_jittorllms_pangualpha import predict_no_ui_long_connection as pangualpha_noui
336
+ from .bridge_jittorllms_pangualpha import predict as pangualpha_ui
337
+ model_info.update({
338
+ "jittorllms_pangualpha": {
339
+ "fn_with_ui": pangualpha_ui,
340
+ "fn_without_ui": pangualpha_noui,
341
+ "endpoint": None,
342
+ "max_token": 1024,
343
+ "tokenizer": tokenizer_gpt35,
344
+ "token_cnt": get_token_num_gpt35,
345
+ },
346
+ })
347
+ if "moss" in AVAIL_LLM_MODELS:
348
+ from .bridge_moss import predict_no_ui_long_connection as moss_noui
349
+ from .bridge_moss import predict as moss_ui
350
+ model_info.update({
351
+ "moss": {
352
+ "fn_with_ui": moss_ui,
353
+ "fn_without_ui": moss_noui,
354
+ "endpoint": None,
355
+ "max_token": 1024,
356
+ "tokenizer": tokenizer_gpt35,
357
+ "token_cnt": get_token_num_gpt35,
358
+ },
359
+ })
360
+ if "stack-claude" in AVAIL_LLM_MODELS:
361
+ from .bridge_stackclaude import predict_no_ui_long_connection as claude_noui
362
+ from .bridge_stackclaude import predict as claude_ui
363
+ model_info.update({
364
+ "stack-claude": {
365
+ "fn_with_ui": claude_ui,
366
+ "fn_without_ui": claude_noui,
367
+ "endpoint": None,
368
+ "max_token": 8192,
369
+ "tokenizer": tokenizer_gpt35,
370
+ "token_cnt": get_token_num_gpt35,
371
+ }
372
+ })
373
+ if "newbing-free" in AVAIL_LLM_MODELS:
374
+ try:
375
+ from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
376
+ from .bridge_newbingfree import predict as newbingfree_ui
377
+ model_info.update({
378
+ "newbing-free": {
379
+ "fn_with_ui": newbingfree_ui,
380
+ "fn_without_ui": newbingfree_noui,
381
+ "endpoint": newbing_endpoint,
382
+ "max_token": 4096,
383
+ "tokenizer": tokenizer_gpt35,
384
+ "token_cnt": get_token_num_gpt35,
385
+ }
386
+ })
387
+ except:
388
+ print(trimmed_format_exc())
389
+ if "newbing" in AVAIL_LLM_MODELS: # same with newbing-free
390
+ try:
391
+ from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
392
+ from .bridge_newbingfree import predict as newbingfree_ui
393
+ model_info.update({
394
+ "newbing": {
395
+ "fn_with_ui": newbingfree_ui,
396
+ "fn_without_ui": newbingfree_noui,
397
+ "endpoint": newbing_endpoint,
398
+ "max_token": 4096,
399
+ "tokenizer": tokenizer_gpt35,
400
+ "token_cnt": get_token_num_gpt35,
401
+ }
402
+ })
403
+ except:
404
+ print(trimmed_format_exc())
405
+ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
406
+ try:
407
+ from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
408
+ from .bridge_chatglmft import predict as chatglmft_ui
409
+ model_info.update({
410
+ "chatglmft": {
411
+ "fn_with_ui": chatglmft_ui,
412
+ "fn_without_ui": chatglmft_noui,
413
+ "endpoint": None,
414
+ "max_token": 4096,
415
+ "tokenizer": tokenizer_gpt35,
416
+ "token_cnt": get_token_num_gpt35,
417
+ }
418
+ })
419
+ except:
420
+ print(trimmed_format_exc())
421
+ if "internlm" in AVAIL_LLM_MODELS:
422
+ try:
423
+ from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
424
+ from .bridge_internlm import predict as internlm_ui
425
+ model_info.update({
426
+ "internlm": {
427
+ "fn_with_ui": internlm_ui,
428
+ "fn_without_ui": internlm_noui,
429
+ "endpoint": None,
430
+ "max_token": 4096,
431
+ "tokenizer": tokenizer_gpt35,
432
+ "token_cnt": get_token_num_gpt35,
433
+ }
434
+ })
435
+ except:
436
+ print(trimmed_format_exc())
437
+ if "chatglm_onnx" in AVAIL_LLM_MODELS:
438
+ try:
439
+ from .bridge_chatglmonnx import predict_no_ui_long_connection as chatglm_onnx_noui
440
+ from .bridge_chatglmonnx import predict as chatglm_onnx_ui
441
+ model_info.update({
442
+ "chatglm_onnx": {
443
+ "fn_with_ui": chatglm_onnx_ui,
444
+ "fn_without_ui": chatglm_onnx_noui,
445
+ "endpoint": None,
446
+ "max_token": 4096,
447
+ "tokenizer": tokenizer_gpt35,
448
+ "token_cnt": get_token_num_gpt35,
449
+ }
450
+ })
451
+ except:
452
+ print(trimmed_format_exc())
453
+ if "qwen-local" in AVAIL_LLM_MODELS:
454
+ try:
455
+ from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
456
+ from .bridge_qwen_local import predict as qwen_local_ui
457
+ model_info.update({
458
+ "qwen-local": {
459
+ "fn_with_ui": qwen_local_ui,
460
+ "fn_without_ui": qwen_local_noui,
461
+ "endpoint": None,
462
+ "max_token": 4096,
463
+ "tokenizer": tokenizer_gpt35,
464
+ "token_cnt": get_token_num_gpt35,
465
+ }
466
+ })
467
+ except:
468
+ print(trimmed_format_exc())
469
+ if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS: # zhipuai
470
+ try:
471
+ from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
472
+ from .bridge_qwen import predict as qwen_ui
473
+ model_info.update({
474
+ "qwen-turbo": {
475
+ "fn_with_ui": qwen_ui,
476
+ "fn_without_ui": qwen_noui,
477
+ "endpoint": None,
478
+ "max_token": 6144,
479
+ "tokenizer": tokenizer_gpt35,
480
+ "token_cnt": get_token_num_gpt35,
481
+ },
482
+ "qwen-plus": {
483
+ "fn_with_ui": qwen_ui,
484
+ "fn_without_ui": qwen_noui,
485
+ "endpoint": None,
486
+ "max_token": 30720,
487
+ "tokenizer": tokenizer_gpt35,
488
+ "token_cnt": get_token_num_gpt35,
489
+ },
490
+ "qwen-max": {
491
+ "fn_with_ui": qwen_ui,
492
+ "fn_without_ui": qwen_noui,
493
+ "endpoint": None,
494
+ "max_token": 28672,
495
+ "tokenizer": tokenizer_gpt35,
496
+ "token_cnt": get_token_num_gpt35,
497
+ }
498
+ })
499
+ except:
500
+ print(trimmed_format_exc())
501
+ if "spark" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
502
+ try:
503
+ from .bridge_spark import predict_no_ui_long_connection as spark_noui
504
+ from .bridge_spark import predict as spark_ui
505
+ model_info.update({
506
+ "spark": {
507
+ "fn_with_ui": spark_ui,
508
+ "fn_without_ui": spark_noui,
509
+ "endpoint": None,
510
+ "max_token": 4096,
511
+ "tokenizer": tokenizer_gpt35,
512
+ "token_cnt": get_token_num_gpt35,
513
+ }
514
+ })
515
+ except:
516
+ print(trimmed_format_exc())
517
+ if "sparkv2" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
518
+ try:
519
+ from .bridge_spark import predict_no_ui_long_connection as spark_noui
520
+ from .bridge_spark import predict as spark_ui
521
+ model_info.update({
522
+ "sparkv2": {
523
+ "fn_with_ui": spark_ui,
524
+ "fn_without_ui": spark_noui,
525
+ "endpoint": None,
526
+ "max_token": 4096,
527
+ "tokenizer": tokenizer_gpt35,
528
+ "token_cnt": get_token_num_gpt35,
529
+ }
530
+ })
531
+ except:
532
+ print(trimmed_format_exc())
533
+ if "sparkv3" in AVAIL_LLM_MODELS: # 讯飞星火认知大模型
534
+ try:
535
+ from .bridge_spark import predict_no_ui_long_connection as spark_noui
536
+ from .bridge_spark import predict as spark_ui
537
+ model_info.update({
538
+ "sparkv3": {
539
+ "fn_with_ui": spark_ui,
540
+ "fn_without_ui": spark_noui,
541
+ "endpoint": None,
542
+ "max_token": 4096,
543
+ "tokenizer": tokenizer_gpt35,
544
+ "token_cnt": get_token_num_gpt35,
545
+ }
546
+ })
547
+ except:
548
+ print(trimmed_format_exc())
549
+ if "llama2" in AVAIL_LLM_MODELS: # llama2
550
+ try:
551
+ from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
552
+ from .bridge_llama2 import predict as llama2_ui
553
+ model_info.update({
554
+ "llama2": {
555
+ "fn_with_ui": llama2_ui,
556
+ "fn_without_ui": llama2_noui,
557
+ "endpoint": None,
558
+ "max_token": 4096,
559
+ "tokenizer": tokenizer_gpt35,
560
+ "token_cnt": get_token_num_gpt35,
561
+ }
562
+ })
563
+ except:
564
+ print(trimmed_format_exc())
565
+ if "zhipuai" in AVAIL_LLM_MODELS: # zhipuai
566
+ try:
567
+ from .bridge_zhipu import predict_no_ui_long_connection as zhipu_noui
568
+ from .bridge_zhipu import predict as zhipu_ui
569
+ model_info.update({
570
+ "zhipuai": {
571
+ "fn_with_ui": zhipu_ui,
572
+ "fn_without_ui": zhipu_noui,
573
+ "endpoint": None,
574
+ "max_token": 4096,
575
+ "tokenizer": tokenizer_gpt35,
576
+ "token_cnt": get_token_num_gpt35,
577
+ }
578
+ })
579
+ except:
580
+ print(trimmed_format_exc())
581
+ if "deepseekcoder" in AVAIL_LLM_MODELS: # deepseekcoder
582
+ try:
583
+ from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
584
+ from .bridge_deepseekcoder import predict as deepseekcoder_ui
585
+ model_info.update({
586
+ "deepseekcoder": {
587
+ "fn_with_ui": deepseekcoder_ui,
588
+ "fn_without_ui": deepseekcoder_noui,
589
+ "endpoint": None,
590
+ "max_token": 2048,
591
+ "tokenizer": tokenizer_gpt35,
592
+ "token_cnt": get_token_num_gpt35,
593
+ }
594
+ })
595
+ except:
596
+ print(trimmed_format_exc())
597
+
598
+ # <-- 用于定义和切换多个azure模型 -->
599
+ AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
600
+ if len(AZURE_CFG_ARRAY) > 0:
601
+ for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items():
602
+ # 可能会覆盖之前的配置,但这是意料之中的
603
+ if not azure_model_name.startswith('azure'):
604
+ raise ValueError("AZURE_CFG_ARRAY中配置的模型必须以azure开头")
605
+ endpoint_ = azure_cfg_dict["AZURE_ENDPOINT"] + \
606
+ f'openai/deployments/{azure_cfg_dict["AZURE_ENGINE"]}/chat/completions?api-version=2023-05-15'
607
+ model_info.update({
608
+ azure_model_name: {
609
+ "fn_with_ui": chatgpt_ui,
610
+ "fn_without_ui": chatgpt_noui,
611
+ "endpoint": endpoint_,
612
+ "azure_api_key": azure_cfg_dict["AZURE_API_KEY"],
613
+ "max_token": azure_cfg_dict["AZURE_MODEL_MAX_TOKEN"],
614
+ "tokenizer": tokenizer_gpt35, # tokenizer只用于粗估token数量
615
+ "token_cnt": get_token_num_gpt35,
616
+ }
617
+ })
618
+ if azure_model_name not in AVAIL_LLM_MODELS:
619
+ AVAIL_LLM_MODELS += [azure_model_name]
620
+
621
+
622
+
623
+
624
+ def LLM_CATCH_EXCEPTION(f):
625
+ """
626
+ 装饰器函数,将错误显示出来
627
+ """
628
+ def decorated(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience):
629
+ try:
630
+ return f(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
631
+ except Exception as e:
632
+ tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
633
+ observe_window[0] = tb_str
634
+ return tb_str
635
+ return decorated
636
+
637
+
638
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window=[], console_slience=False):
639
+ """
640
+ 发送至LLM,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
641
+ inputs:
642
+ 是本次问询的输入
643
+ sys_prompt:
644
+ 系统静默prompt
645
+ llm_kwargs:
646
+ LLM的内部调优参数
647
+ history:
648
+ 是之前的对话列表
649
+ observe_window = None:
650
+ 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
651
+ """
652
+ import threading, time, copy
653
+
654
+ model = llm_kwargs['llm_model']
655
+ n_model = 1
656
+ if '&' not in model:
657
+ assert not model.startswith("tgui"), "TGUI不支持函数插件的实现"
658
+
659
+ # 如果只询问1个大语言模型:
660
+ method = model_info[model]["fn_without_ui"]
661
+ return method(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
662
+ else:
663
+
664
+ # 如果同时询问多个大语言模型,这个稍微啰嗦一点,但思路相同,您不必读这个else分支
665
+ executor = ThreadPoolExecutor(max_workers=4)
666
+ models = model.split('&')
667
+ n_model = len(models)
668
+
669
+ window_len = len(observe_window)
670
+ assert window_len==3
671
+ window_mutex = [["", time.time(), ""] for _ in range(n_model)] + [True]
672
+
673
+ futures = []
674
+ for i in range(n_model):
675
+ model = models[i]
676
+ method = model_info[model]["fn_without_ui"]
677
+ llm_kwargs_feedin = copy.deepcopy(llm_kwargs)
678
+ llm_kwargs_feedin['llm_model'] = model
679
+ future = executor.submit(LLM_CATCH_EXCEPTION(method), inputs, llm_kwargs_feedin, history, sys_prompt, window_mutex[i], console_slience)
680
+ futures.append(future)
681
+
682
+ def mutex_manager(window_mutex, observe_window):
683
+ while True:
684
+ time.sleep(0.25)
685
+ if not window_mutex[-1]: break
686
+ # 看门狗(watchdog)
687
+ for i in range(n_model):
688
+ window_mutex[i][1] = observe_window[1]
689
+ # 观察窗(window)
690
+ chat_string = []
691
+ for i in range(n_model):
692
+ chat_string.append( f"【{str(models[i])} 说】: <font color=\"{colors[i]}\"> {window_mutex[i][0]} </font>" )
693
+ res = '<br/><br/>\n\n---\n\n'.join(chat_string)
694
+ # # # # # # # # # # #
695
+ observe_window[0] = res
696
+
697
+ t_model = threading.Thread(target=mutex_manager, args=(window_mutex, observe_window), daemon=True)
698
+ t_model.start()
699
+
700
+ return_string_collect = []
701
+ while True:
702
+ worker_done = [h.done() for h in futures]
703
+ if all(worker_done):
704
+ executor.shutdown()
705
+ break
706
+ time.sleep(1)
707
+
708
+ for i, future in enumerate(futures): # wait and get
709
+ return_string_collect.append( f"【{str(models[i])} 说】: <font color=\"{colors[i]}\"> {future.result()} </font>" )
710
+
711
+ window_mutex[-1] = False # stop mutex thread
712
+ res = '<br/><br/>\n\n---\n\n'.join(return_string_collect)
713
+ return res
714
+
715
+
716
+ def predict(inputs, llm_kwargs, *args, **kwargs):
717
+ """
718
+ 发送至LLM,流式获取输出。
719
+ 用于基础的对话功能。
720
+ inputs 是本次问询的输入
721
+ top_p, temperature是LLM的内部调优参数
722
+ history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
723
+ chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
724
+ additional_fn代表点击的哪个按钮,按钮见functional.py
725
+ """
726
+
727
+ method = model_info[llm_kwargs['llm_model']]["fn_with_ui"] # 如果这里报错,检查config中的AVAIL_LLM_MODELS选项
728
+ yield from method(inputs, llm_kwargs, *args, **kwargs)
729
+
request_llms/bridge_chatglm.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "ChatGLM"
2
+ cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
3
+
4
+
5
+ from toolbox import get_conf, ProxyNetworkActivate
6
+ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
7
+
8
+
9
+
10
+ # ------------------------------------------------------------------------------------------------------------------------
11
+ # 🔌💻 Local Model
12
+ # ------------------------------------------------------------------------------------------------------------------------
13
+ class GetGLM2Handle(LocalLLMHandle):
14
+
15
+ def load_model_info(self):
16
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
17
+ self.model_name = model_name
18
+ self.cmd_to_install = cmd_to_install
19
+
20
+ def load_model_and_tokenizer(self):
21
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
22
+ import os, glob
23
+ import os
24
+ import platform
25
+ from transformers import AutoModel, AutoTokenizer
26
+ LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
27
+
28
+ if LOCAL_MODEL_QUANT == "INT4": # INT4
29
+ _model_name_ = "THUDM/chatglm2-6b-int4"
30
+ elif LOCAL_MODEL_QUANT == "INT8": # INT8
31
+ _model_name_ = "THUDM/chatglm2-6b-int8"
32
+ else:
33
+ _model_name_ = "THUDM/chatglm2-6b" # FP16
34
+
35
+ with ProxyNetworkActivate('Download_LLM'):
36
+ chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
37
+ if device=='cpu':
38
+ chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
39
+ else:
40
+ chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
41
+ chatglm_model = chatglm_model.eval()
42
+
43
+ self._model = chatglm_model
44
+ self._tokenizer = chatglm_tokenizer
45
+ return self._model, self._tokenizer
46
+
47
+ def llm_stream_generator(self, **kwargs):
48
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
49
+ def adaptor(kwargs):
50
+ query = kwargs['query']
51
+ max_length = kwargs['max_length']
52
+ top_p = kwargs['top_p']
53
+ temperature = kwargs['temperature']
54
+ history = kwargs['history']
55
+ return query, max_length, top_p, temperature, history
56
+
57
+ query, max_length, top_p, temperature, history = adaptor(kwargs)
58
+
59
+ for response, history in self._model.stream_chat(self._tokenizer,
60
+ query,
61
+ history,
62
+ max_length=max_length,
63
+ top_p=top_p,
64
+ temperature=temperature,
65
+ ):
66
+ yield response
67
+
68
+ def try_to_import_special_deps(self, **kwargs):
69
+ # import something that will raise error if the user does not install requirement_*.txt
70
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
71
+ import importlib
72
+ # importlib.import_module('modelscope')
73
+
74
+
75
+ # ------------------------------------------------------------------------------------------------------------------------
76
+ # 🔌💻 GPT-Academic Interface
77
+ # ------------------------------------------------------------------------------------------------------------------------
78
+ predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetGLM2Handle, model_name)
request_llms/bridge_chatglm3.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "ChatGLM3"
2
+ cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
3
+
4
+
5
+ from toolbox import get_conf, ProxyNetworkActivate
6
+ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
7
+
8
+
9
+
10
+ # ------------------------------------------------------------------------------------------------------------------------
11
+ # 🔌💻 Local Model
12
+ # ------------------------------------------------------------------------------------------------------------------------
13
+ class GetGLM3Handle(LocalLLMHandle):
14
+
15
+ def load_model_info(self):
16
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
17
+ self.model_name = model_name
18
+ self.cmd_to_install = cmd_to_install
19
+
20
+ def load_model_and_tokenizer(self):
21
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
22
+ from transformers import AutoModel, AutoTokenizer
23
+ import os, glob
24
+ import os
25
+ import platform
26
+ LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
27
+
28
+ if LOCAL_MODEL_QUANT == "INT4": # INT4
29
+ _model_name_ = "THUDM/chatglm3-6b-int4"
30
+ elif LOCAL_MODEL_QUANT == "INT8": # INT8
31
+ _model_name_ = "THUDM/chatglm3-6b-int8"
32
+ else:
33
+ _model_name_ = "THUDM/chatglm3-6b" # FP16
34
+ with ProxyNetworkActivate('Download_LLM'):
35
+ chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
36
+ if device=='cpu':
37
+ chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cpu').float()
38
+ else:
39
+ chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cuda')
40
+ chatglm_model = chatglm_model.eval()
41
+
42
+ self._model = chatglm_model
43
+ self._tokenizer = chatglm_tokenizer
44
+ return self._model, self._tokenizer
45
+
46
+ def llm_stream_generator(self, **kwargs):
47
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
48
+ def adaptor(kwargs):
49
+ query = kwargs['query']
50
+ max_length = kwargs['max_length']
51
+ top_p = kwargs['top_p']
52
+ temperature = kwargs['temperature']
53
+ history = kwargs['history']
54
+ return query, max_length, top_p, temperature, history
55
+
56
+ query, max_length, top_p, temperature, history = adaptor(kwargs)
57
+
58
+ for response, history in self._model.stream_chat(self._tokenizer,
59
+ query,
60
+ history,
61
+ max_length=max_length,
62
+ top_p=top_p,
63
+ temperature=temperature,
64
+ ):
65
+ yield response
66
+
67
+ def try_to_import_special_deps(self, **kwargs):
68
+ # import something that will raise error if the user does not install requirement_*.txt
69
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
70
+ import importlib
71
+ # importlib.import_module('modelscope')
72
+
73
+
74
+ # ------------------------------------------------------------------------------------------------------------------------
75
+ # 🔌💻 GPT-Academic Interface
76
+ # ------------------------------------------------------------------------------------------------------------------------
77
+ predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetGLM3Handle, model_name, history_format='chatglm3')
request_llms/bridge_chatglmft.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoModel, AutoTokenizer
3
+ import time
4
+ import os
5
+ import json
6
+ import threading
7
+ import importlib
8
+ from toolbox import update_ui, get_conf
9
+ from multiprocessing import Process, Pipe
10
+
11
+ load_message = "ChatGLMFT尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,ChatGLMFT消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……"
12
+
13
+ def string_to_options(arguments):
14
+ import argparse
15
+ import shlex
16
+ # Create an argparse.ArgumentParser instance
17
+ parser = argparse.ArgumentParser()
18
+ # Add command-line arguments
19
+ parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
20
+ parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
21
+ parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
22
+ parser.add_argument("--batch", type=int, help="System prompt", default=50)
23
+ # Parse the arguments
24
+ args = parser.parse_args(shlex.split(arguments))
25
+ return args
26
+
27
+
28
+ #################################################################################
29
+ class GetGLMFTHandle(Process):
30
+ def __init__(self):
31
+ super().__init__(daemon=True)
32
+ self.parent, self.child = Pipe()
33
+ self.chatglmft_model = None
34
+ self.chatglmft_tokenizer = None
35
+ self.info = ""
36
+ self.success = True
37
+ self.check_dependency()
38
+ self.start()
39
+ self.threadLock = threading.Lock()
40
+
41
+ def check_dependency(self):
42
+ try:
43
+ import sentencepiece
44
+ self.info = "依赖检测通过"
45
+ self.success = True
46
+ except:
47
+ self.info = "缺少ChatGLMFT的依赖,如果要使用ChatGLMFT,除了基础的pip依赖以外,您还需要运行`pip install -r request_llms/requirements_chatglm.txt`安装ChatGLM的依赖。"
48
+ self.success = False
49
+
50
+ def ready(self):
51
+ return self.chatglmft_model is not None
52
+
53
+ def run(self):
54
+ # 子进程执行
55
+ # 第一次运行,加载参数
56
+ retry = 0
57
+ while True:
58
+ try:
59
+ if self.chatglmft_model is None:
60
+ from transformers import AutoConfig
61
+ import torch
62
+ # conf = 'request_llms/current_ptune_model.json'
63
+ # if not os.path.exists(conf): raise RuntimeError('找不到微调模型信息')
64
+ # with open(conf, 'r', encoding='utf8') as f:
65
+ # model_args = json.loads(f.read())
66
+ CHATGLM_PTUNING_CHECKPOINT = get_conf('CHATGLM_PTUNING_CHECKPOINT')
67
+ assert os.path.exists(CHATGLM_PTUNING_CHECKPOINT), "找不到微调模型检查点"
68
+ conf = os.path.join(CHATGLM_PTUNING_CHECKPOINT, "config.json")
69
+ with open(conf, 'r', encoding='utf8') as f:
70
+ model_args = json.loads(f.read())
71
+ if 'model_name_or_path' not in model_args:
72
+ model_args['model_name_or_path'] = model_args['_name_or_path']
73
+ self.chatglmft_tokenizer = AutoTokenizer.from_pretrained(
74
+ model_args['model_name_or_path'], trust_remote_code=True)
75
+ config = AutoConfig.from_pretrained(
76
+ model_args['model_name_or_path'], trust_remote_code=True)
77
+
78
+ config.pre_seq_len = model_args['pre_seq_len']
79
+ config.prefix_projection = model_args['prefix_projection']
80
+
81
+ print(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}")
82
+ model = AutoModel.from_pretrained(model_args['model_name_or_path'], config=config, trust_remote_code=True)
83
+ prefix_state_dict = torch.load(os.path.join(CHATGLM_PTUNING_CHECKPOINT, "pytorch_model.bin"))
84
+ new_prefix_state_dict = {}
85
+ for k, v in prefix_state_dict.items():
86
+ if k.startswith("transformer.prefix_encoder."):
87
+ new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
88
+ model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
89
+
90
+ if model_args['quantization_bit'] is not None and model_args['quantization_bit'] != 0:
91
+ print(f"Quantized to {model_args['quantization_bit']} bit")
92
+ model = model.quantize(model_args['quantization_bit'])
93
+ model = model.cuda()
94
+ if model_args['pre_seq_len'] is not None:
95
+ # P-tuning v2
96
+ model.transformer.prefix_encoder.float()
97
+ self.chatglmft_model = model.eval()
98
+
99
+ break
100
+ else:
101
+ break
102
+ except Exception as e:
103
+ retry += 1
104
+ if retry > 3:
105
+ self.child.send('[Local Message] Call ChatGLMFT fail 不能正常加载ChatGLMFT的参数。')
106
+ raise RuntimeError("不能正常加载ChatGLMFT的参数!")
107
+
108
+ while True:
109
+ # 进入任务等待状态
110
+ kwargs = self.child.recv()
111
+ # 收到消息,开始请求
112
+ try:
113
+ for response, history in self.chatglmft_model.stream_chat(self.chatglmft_tokenizer, **kwargs):
114
+ self.child.send(response)
115
+ # # 中途接收可能的终止指令(如果有的话)
116
+ # if self.child.poll():
117
+ # command = self.child.recv()
118
+ # if command == '[Terminate]': break
119
+ except:
120
+ from toolbox import trimmed_format_exc
121
+ self.child.send('[Local Message] Call ChatGLMFT fail.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
122
+ # 请求处理结束,开始下一个循环
123
+ self.child.send('[Finish]')
124
+
125
+ def stream_chat(self, **kwargs):
126
+ # 主进程执行
127
+ self.threadLock.acquire()
128
+ self.parent.send(kwargs)
129
+ while True:
130
+ res = self.parent.recv()
131
+ if res != '[Finish]':
132
+ yield res
133
+ else:
134
+ break
135
+ self.threadLock.release()
136
+
137
+ global glmft_handle
138
+ glmft_handle = None
139
+ #################################################################################
140
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
141
+ """
142
+ 多线程方法
143
+ 函数的说明请见 request_llms/bridge_all.py
144
+ """
145
+ global glmft_handle
146
+ if glmft_handle is None:
147
+ glmft_handle = GetGLMFTHandle()
148
+ if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + glmft_handle.info
149
+ if not glmft_handle.success:
150
+ error = glmft_handle.info
151
+ glmft_handle = None
152
+ raise RuntimeError(error)
153
+
154
+ # chatglmft 没有 sys_prompt 接口,因此把prompt加入 history
155
+ history_feedin = []
156
+ history_feedin.append(["What can I do?", sys_prompt])
157
+ for i in range(len(history)//2):
158
+ history_feedin.append([history[2*i], history[2*i+1]] )
159
+
160
+ watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
161
+ response = ""
162
+ for response in glmft_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
163
+ if len(observe_window) >= 1: observe_window[0] = response
164
+ if len(observe_window) >= 2:
165
+ if (time.time()-observe_window[1]) > watch_dog_patience:
166
+ raise RuntimeError("程序终止。")
167
+ return response
168
+
169
+
170
+
171
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
172
+ """
173
+ 单线程方法
174
+ 函数的说明请见 request_llms/bridge_all.py
175
+ """
176
+ chatbot.append((inputs, ""))
177
+
178
+ global glmft_handle
179
+ if glmft_handle is None:
180
+ glmft_handle = GetGLMFTHandle()
181
+ chatbot[-1] = (inputs, load_message + "\n\n" + glmft_handle.info)
182
+ yield from update_ui(chatbot=chatbot, history=[])
183
+ if not glmft_handle.success:
184
+ glmft_handle = None
185
+ return
186
+
187
+ if additional_fn is not None:
188
+ from core_functional import handle_core_functionality
189
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
190
+
191
+ # 处理历史信息
192
+ history_feedin = []
193
+ history_feedin.append(["What can I do?", system_prompt] )
194
+ for i in range(len(history)//2):
195
+ history_feedin.append([history[2*i], history[2*i+1]] )
196
+
197
+ # 开始接收chatglmft的回复
198
+ response = "[Local Message] 等待ChatGLMFT响应中 ..."
199
+ for response in glmft_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
200
+ chatbot[-1] = (inputs, response)
201
+ yield from update_ui(chatbot=chatbot, history=history)
202
+
203
+ # 总结输出
204
+ if response == "[Local Message] 等待ChatGLMFT响应中 ...":
205
+ response = "[Local Message] ChatGLMFT响应异常 ..."
206
+ history.extend([inputs, response])
207
+ yield from update_ui(chatbot=chatbot, history=history)
request_llms/bridge_chatgpt.py ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
2
+
3
+ """
4
+ 该文件中主要包含三个函数
5
+
6
+ 不具备多线程能力的函数:
7
+ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
8
+
9
+ 具备多线程调用能力的函数
10
+ 2. predict_no_ui_long_connection:支持多线程
11
+ """
12
+
13
+ import json
14
+ import time
15
+ import gradio as gr
16
+ import logging
17
+ import traceback
18
+ import requests
19
+ import importlib
20
+ import random
21
+
22
+ # config_private.py放自己的秘密如API和代理网址
23
+ # 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
24
+ from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder
25
+ proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
26
+ get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
27
+
28
+ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
29
+ '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
30
+
31
+ def get_full_error(chunk, stream_response):
32
+ """
33
+ 获取完整的从Openai返回的报错
34
+ """
35
+ while True:
36
+ try:
37
+ chunk += next(stream_response)
38
+ except:
39
+ break
40
+ return chunk
41
+
42
+ def decode_chunk(chunk):
43
+ # 提前读取一些信息 (用于判断异常)
44
+ chunk_decoded = chunk.decode()
45
+ chunkjson = None
46
+ has_choices = False
47
+ choice_valid = False
48
+ has_content = False
49
+ has_role = False
50
+ try:
51
+ chunkjson = json.loads(chunk_decoded[6:])
52
+ has_choices = 'choices' in chunkjson
53
+ if has_choices: choice_valid = (len(chunkjson['choices']) > 0)
54
+ if has_choices and choice_valid: has_content = ("content" in chunkjson['choices'][0]["delta"])
55
+ if has_content: has_content = (chunkjson['choices'][0]["delta"]["content"] is not None)
56
+ if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"]
57
+ except:
58
+ pass
59
+ return chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role
60
+
61
+ from functools import lru_cache
62
+ @lru_cache(maxsize=32)
63
+ def verify_endpoint(endpoint):
64
+ """
65
+ 检查endpoint是否可用
66
+ """
67
+ if "你亲手写的api名称" in endpoint:
68
+ raise ValueError("Endpoint不正确, 请检查AZURE_ENDPOINT的配置! 当前的Endpoint为:" + endpoint)
69
+ return endpoint
70
+
71
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
72
+ """
73
+ 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
74
+ inputs:
75
+ 是本次问询的输入
76
+ sys_prompt:
77
+ 系统静默prompt
78
+ llm_kwargs:
79
+ chatGPT的内部调优参数
80
+ history:
81
+ 是之前的对话列表
82
+ observe_window = None:
83
+ 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
84
+ """
85
+ watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
86
+ headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
87
+ retry = 0
88
+ while True:
89
+ try:
90
+ # make a POST request to the API endpoint, stream=False
91
+ from .bridge_all import model_info
92
+ endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
93
+ response = requests.post(endpoint, headers=headers, proxies=proxies,
94
+ json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
95
+ except requests.exceptions.ReadTimeout as e:
96
+ retry += 1
97
+ traceback.print_exc()
98
+ if retry > MAX_RETRY: raise TimeoutError
99
+ if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
100
+
101
+ stream_response = response.iter_lines()
102
+ result = ''
103
+ json_data = None
104
+ while True:
105
+ try: chunk = next(stream_response)
106
+ except StopIteration:
107
+ break
108
+ except requests.exceptions.ConnectionError:
109
+ chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
110
+ chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
111
+ if len(chunk_decoded)==0: continue
112
+ if not chunk_decoded.startswith('data:'):
113
+ error_msg = get_full_error(chunk, stream_response).decode()
114
+ if "reduce the length" in error_msg:
115
+ raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
116
+ else:
117
+ raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
118
+ if ('data: [DONE]' in chunk_decoded): break # api2d 正常完成
119
+ # 提前读取一些信息 (用于判断异常)
120
+ if has_choices and not choice_valid:
121
+ # 一些垃圾第三方接口的出现这样的错误
122
+ continue
123
+ json_data = chunkjson['choices'][0]
124
+ delta = json_data["delta"]
125
+ if len(delta) == 0: break
126
+ if "role" in delta: continue
127
+ if "content" in delta:
128
+ result += delta["content"]
129
+ if not console_slience: print(delta["content"], end='')
130
+ if observe_window is not None:
131
+ # 观测窗,把已经获取的数据显示出去
132
+ if len(observe_window) >= 1:
133
+ observe_window[0] += delta["content"]
134
+ # 看门狗,如果超过期限没有喂狗,则终止
135
+ if len(observe_window) >= 2:
136
+ if (time.time()-observe_window[1]) > watch_dog_patience:
137
+ raise RuntimeError("用户取消了程序。")
138
+ else: raise RuntimeError("意外Json结构:"+delta)
139
+ if json_data and json_data['finish_reason'] == 'content_filter':
140
+ raise RuntimeError("由于提问含不合规内容被Azure过滤。")
141
+ if json_data and json_data['finish_reason'] == 'length':
142
+ raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
143
+ return result
144
+
145
+
146
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
147
+ """
148
+ 发送至chatGPT,流式获取输出。
149
+ 用于基础的对话功能。
150
+ inputs 是本次问询的输入
151
+ top_p, temperature是chatGPT的内部调优参数
152
+ history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
153
+ chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
154
+ additional_fn代表点击的哪个按钮,按钮见functional.py
155
+ """
156
+ if is_any_api_key(inputs):
157
+ chatbot._cookies['api_key'] = inputs
158
+ chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
159
+ yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
160
+ return
161
+ elif not is_any_api_key(chatbot._cookies['api_key']):
162
+ chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。"))
163
+ yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
164
+ return
165
+
166
+ user_input = inputs
167
+ if additional_fn is not None:
168
+ from core_functional import handle_core_functionality
169
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
170
+
171
+ raw_input = inputs
172
+ logging.info(f'[raw_input] {raw_input}')
173
+ chatbot.append((inputs, ""))
174
+ yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
175
+
176
+ # check mis-behavior
177
+ if is_the_upload_folder(user_input):
178
+ chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。")
179
+ yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
180
+ time.sleep(2)
181
+
182
+ try:
183
+ headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
184
+ except RuntimeError as e:
185
+ chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
186
+ yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
187
+ return
188
+
189
+ # 检查endpoint是否合法
190
+ try:
191
+ from .bridge_all import model_info
192
+ endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
193
+ except:
194
+ tb_str = '```\n' + trimmed_format_exc() + '```'
195
+ chatbot[-1] = (inputs, tb_str)
196
+ yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面
197
+ return
198
+
199
+ history.append(inputs); history.append("")
200
+
201
+ retry = 0
202
+ while True:
203
+ try:
204
+ # make a POST request to the API endpoint, stream=True
205
+ response = requests.post(endpoint, headers=headers, proxies=proxies,
206
+ json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
207
+ except:
208
+ retry += 1
209
+ chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
210
+ retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
211
+ yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界���
212
+ if retry > MAX_RETRY: raise TimeoutError
213
+
214
+ gpt_replying_buffer = ""
215
+
216
+ is_head_of_the_stream = True
217
+ if stream:
218
+ stream_response = response.iter_lines()
219
+ while True:
220
+ try:
221
+ chunk = next(stream_response)
222
+ except StopIteration:
223
+ # 非OpenAI官方接口的出现这样的报错,OpenAI和API2D不会走这里
224
+ chunk_decoded = chunk.decode()
225
+ error_msg = chunk_decoded
226
+ # 首先排除一个one-api没有done数据包的第三方Bug情形
227
+ if len(gpt_replying_buffer.strip()) > 0 and len(error_msg) == 0:
228
+ yield from update_ui(chatbot=chatbot, history=history, msg="检测到有缺陷的非OpenAI官方接口,建议选择更稳定的接口。")
229
+ break
230
+ # 其他情况,直接返回报错
231
+ chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
232
+ yield from update_ui(chatbot=chatbot, history=history, msg="非OpenAI官方接口返回了错误:" + chunk.decode()) # 刷新界面
233
+ return
234
+
235
+ # 提前读取一些信息 (用于判断异常)
236
+ chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
237
+
238
+ if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"content" not in chunk_decoded):
239
+ # 数据流的第一帧不携带content
240
+ is_head_of_the_stream = False; continue
241
+
242
+ if chunk:
243
+ try:
244
+ if has_choices and not choice_valid:
245
+ # 一些垃圾第三方接口的出现这样的错误
246
+ continue
247
+ # 前者是API2D的结束条件,后者是OPENAI的结束条件
248
+ if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
249
+ # 判定为数据流的结束,gpt_replying_buffer也写完了
250
+ logging.info(f'[response] {gpt_replying_buffer}')
251
+ break
252
+ # 处理数据流的主体
253
+ status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
254
+ # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
255
+ if has_content:
256
+ # 正常情况
257
+ gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
258
+ elif has_role:
259
+ # 一些第三方接口的出现这样的错误,兼容一下吧
260
+ continue
261
+ else:
262
+ # 一些垃圾第三方接口的出现这样的错误
263
+ gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
264
+
265
+ history[-1] = gpt_replying_buffer
266
+ chatbot[-1] = (history[-2], history[-1])
267
+ yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
268
+ except Exception as e:
269
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
270
+ chunk = get_full_error(chunk, stream_response)
271
+ chunk_decoded = chunk.decode()
272
+ error_msg = chunk_decoded
273
+ chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
274
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
275
+ print(error_msg)
276
+ return
277
+
278
+ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
279
+ from .bridge_all import model_info
280
+ openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
281
+ if "reduce the length" in error_msg:
282
+ if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
283
+ history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
284
+ max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
285
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
286
+ elif "does not exist" in error_msg:
287
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
288
+ elif "Incorrect API key" in error_msg:
289
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
290
+ elif "exceeded your current quota" in error_msg:
291
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
292
+ elif "account is not active" in error_msg:
293
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
294
+ elif "associated with a deactivated account" in error_msg:
295
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
296
+ elif "API key has been deactivated" in error_msg:
297
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] API key has been deactivated. OpenAI以账户失效为由, 拒绝服务." + openai_website)
298
+ elif "bad forward key" in error_msg:
299
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
300
+ elif "Not enough point" in error_msg:
301
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
302
+ else:
303
+ from toolbox import regular_txt_to_markdown
304
+ tb_str = '```\n' + trimmed_format_exc() + '```'
305
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
306
+ return chatbot, history
307
+
308
+ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
309
+ """
310
+ 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
311
+ """
312
+ if not is_any_api_key(llm_kwargs['api_key']):
313
+ raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
314
+
315
+ api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
316
+
317
+ headers = {
318
+ "Content-Type": "application/json",
319
+ "Authorization": f"Bearer {api_key}"
320
+ }
321
+ if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
322
+ if llm_kwargs['llm_model'].startswith('azure-'):
323
+ headers.update({"api-key": api_key})
324
+ if llm_kwargs['llm_model'] in AZURE_CFG_ARRAY.keys():
325
+ azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"]
326
+ headers.update({"api-key": azure_api_key_unshared})
327
+
328
+ conversation_cnt = len(history) // 2
329
+
330
+ messages = [{"role": "system", "content": system_prompt}]
331
+ if conversation_cnt:
332
+ for index in range(0, 2*conversation_cnt, 2):
333
+ what_i_have_asked = {}
334
+ what_i_have_asked["role"] = "user"
335
+ what_i_have_asked["content"] = history[index]
336
+ what_gpt_answer = {}
337
+ what_gpt_answer["role"] = "assistant"
338
+ what_gpt_answer["content"] = history[index+1]
339
+ if what_i_have_asked["content"] != "":
340
+ if what_gpt_answer["content"] == "": continue
341
+ if what_gpt_answer["content"] == timeout_bot_msg: continue
342
+ messages.append(what_i_have_asked)
343
+ messages.append(what_gpt_answer)
344
+ else:
345
+ messages[-1]['content'] = what_gpt_answer['content']
346
+
347
+ what_i_ask_now = {}
348
+ what_i_ask_now["role"] = "user"
349
+ what_i_ask_now["content"] = inputs
350
+ messages.append(what_i_ask_now)
351
+ model = llm_kwargs['llm_model']
352
+ if llm_kwargs['llm_model'].startswith('api2d-'):
353
+ model = llm_kwargs['llm_model'][len('api2d-'):]
354
+
355
+ if model == "gpt-3.5-random": # 随机选择, 绕过openai访问频率限制
356
+ model = random.choice([
357
+ "gpt-3.5-turbo",
358
+ "gpt-3.5-turbo-16k",
359
+ "gpt-3.5-turbo-1106",
360
+ "gpt-3.5-turbo-0613",
361
+ "gpt-3.5-turbo-16k-0613",
362
+ "gpt-3.5-turbo-0301",
363
+ ])
364
+ logging.info("Random select model:" + model)
365
+
366
+ payload = {
367
+ "model": model,
368
+ "messages": messages,
369
+ "temperature": llm_kwargs['temperature'], # 1.0,
370
+ "top_p": llm_kwargs['top_p'], # 1.0,
371
+ "n": 1,
372
+ "stream": stream,
373
+ "presence_penalty": 0,
374
+ "frequency_penalty": 0,
375
+ }
376
+ try:
377
+ print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
378
+ except:
379
+ print('输入中可能存在乱码。')
380
+ return headers,payload
381
+
382
+
request_llms/bridge_chatgpt_vision.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 该文件中主要包含三个函数
3
+
4
+ 不具备多线程能力的函数:
5
+ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
6
+
7
+ 具备多线程调用能力的函数
8
+ 2. predict_no_ui_long_connection:支持多线程
9
+ """
10
+
11
+ import json
12
+ import time
13
+ import logging
14
+ import requests
15
+ import base64
16
+ import os
17
+ import glob
18
+ from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \
19
+ update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files
20
+
21
+
22
+ proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
23
+ get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
24
+
25
+ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
26
+ '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
27
+
28
+
29
+ def report_invalid_key(key):
30
+ if get_conf("BLOCK_INVALID_APIKEY"):
31
+ # 实验性功能,自动检测并屏蔽失效的KEY,请勿使用
32
+ from request_llms.key_manager import ApiKeyManager
33
+ api_key = ApiKeyManager().add_key_to_blacklist(key)
34
+
35
+ def get_full_error(chunk, stream_response):
36
+ """
37
+ 获取完整的从Openai返回的报错
38
+ """
39
+ while True:
40
+ try:
41
+ chunk += next(stream_response)
42
+ except:
43
+ break
44
+ return chunk
45
+
46
+ def decode_chunk(chunk):
47
+ # 提前读取一些信息 (用于判断异常)
48
+ chunk_decoded = chunk.decode()
49
+ chunkjson = None
50
+ has_choices = False
51
+ choice_valid = False
52
+ has_content = False
53
+ has_role = False
54
+ try:
55
+ chunkjson = json.loads(chunk_decoded[6:])
56
+ has_choices = 'choices' in chunkjson
57
+ if has_choices: choice_valid = (len(chunkjson['choices']) > 0)
58
+ if has_choices and choice_valid: has_content = "content" in chunkjson['choices'][0]["delta"]
59
+ if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"]
60
+ except:
61
+ pass
62
+ return chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role
63
+
64
+ from functools import lru_cache
65
+ @lru_cache(maxsize=32)
66
+ def verify_endpoint(endpoint):
67
+ """
68
+ 检查endpoint是否可用
69
+ """
70
+ return endpoint
71
+
72
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
73
+ raise NotImplementedError
74
+
75
+
76
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
77
+
78
+ have_recent_file, image_paths = have_any_recent_upload_image_files(chatbot)
79
+
80
+ if is_any_api_key(inputs):
81
+ chatbot._cookies['api_key'] = inputs
82
+ chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
83
+ yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
84
+ return
85
+ elif not is_any_api_key(chatbot._cookies['api_key']):
86
+ chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。"))
87
+ yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
88
+ return
89
+ if not have_recent_file:
90
+ chatbot.append((inputs, "没有检测到任何近期上传的图像文件,请上传jpg格式的图片,此外,请注意拓展名需要小写"))
91
+ yield from update_ui(chatbot=chatbot, history=history, msg="等待图片") # 刷新界面
92
+ return
93
+ if os.path.exists(inputs):
94
+ chatbot.append((inputs, "已经接收到您上传的文件,您不需要再重复强调该文件的路径了,请直接输入您的问题。"))
95
+ yield from update_ui(chatbot=chatbot, history=history, msg="等待指令") # 刷新界面
96
+ return
97
+
98
+
99
+ user_input = inputs
100
+ if additional_fn is not None:
101
+ from core_functional import handle_core_functionality
102
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
103
+
104
+ raw_input = inputs
105
+ logging.info(f'[raw_input] {raw_input}')
106
+ def make_media_input(inputs, image_paths):
107
+ for image_path in image_paths:
108
+ inputs = inputs + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
109
+ return inputs
110
+ chatbot.append((make_media_input(inputs, image_paths), ""))
111
+ yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
112
+
113
+ # check mis-behavior
114
+ if is_the_upload_folder(user_input):
115
+ chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误!当您上传文档之后,需点击“**函数插件区**”按钮进行处理,请勿点击“提交”按钮或者“基础功能区”按钮。")
116
+ yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
117
+ time.sleep(2)
118
+
119
+ try:
120
+ headers, payload, api_key = generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths)
121
+ except RuntimeError as e:
122
+ chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
123
+ yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
124
+ return
125
+
126
+ # 检查endpoint是否合法
127
+ try:
128
+ from .bridge_all import model_info
129
+ endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
130
+ except:
131
+ tb_str = '```\n' + trimmed_format_exc() + '```'
132
+ chatbot[-1] = (inputs, tb_str)
133
+ yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面
134
+ return
135
+
136
+ history.append(make_media_input(inputs, image_paths))
137
+ history.append("")
138
+
139
+ retry = 0
140
+ while True:
141
+ try:
142
+ # make a POST request to the API endpoint, stream=True
143
+ response = requests.post(endpoint, headers=headers, proxies=proxies,
144
+ json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
145
+ except:
146
+ retry += 1
147
+ chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
148
+ retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
149
+ yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
150
+ if retry > MAX_RETRY: raise TimeoutError
151
+
152
+ gpt_replying_buffer = ""
153
+
154
+ is_head_of_the_stream = True
155
+ if stream:
156
+ stream_response = response.iter_lines()
157
+ while True:
158
+ try:
159
+ chunk = next(stream_response)
160
+ except StopIteration:
161
+ # 非OpenAI官方接口的出现这样的报错,OpenAI和API2D不会走这里
162
+ chunk_decoded = chunk.decode()
163
+ error_msg = chunk_decoded
164
+ # 首先排除一个one-api没有done数据包的第三方Bug情形
165
+ if len(gpt_replying_buffer.strip()) > 0 and len(error_msg) == 0:
166
+ yield from update_ui(chatbot=chatbot, history=history, msg="检测到有缺陷的非OpenAI官方接口,建议选择更稳定的接口。")
167
+ break
168
+ # 其他情况,直接返回报错
169
+ chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key)
170
+ yield from update_ui(chatbot=chatbot, history=history, msg="非OpenAI官方接口返回了错误:" + chunk.decode()) # 刷新界面
171
+ return
172
+
173
+ # 提前读取一些信息 (用于判断异常)
174
+ chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
175
+
176
+ if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"content" not in chunk_decoded):
177
+ # 数据流的第一帧不携带content
178
+ is_head_of_the_stream = False; continue
179
+
180
+ if chunk:
181
+ try:
182
+ if has_choices and not choice_valid:
183
+ # 一些垃圾第三方接口的出现这样的错误
184
+ continue
185
+ # 前者是API2D的结束条件,后者是OPENAI的结束条件
186
+ if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
187
+ # 判定为数据流的结束,gpt_replying_buffer也写完了
188
+ lastmsg = chatbot[-1][-1] + f"\n\n\n\n「{llm_kwargs['llm_model']}调用结束,该模型不具备上下文对话能力,如需追问,请及时切换模型。」"
189
+ yield from update_ui_lastest_msg(lastmsg, chatbot, history, delay=1)
190
+ logging.info(f'[response] {gpt_replying_buffer}')
191
+ break
192
+ # 处理数据流的主体
193
+ status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
194
+ # 如果这里抛出异常,一般是文本过长,详情见get_full_error的输出
195
+ if has_content:
196
+ # 正常情况
197
+ gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
198
+ elif has_role:
199
+ # 一些第三方接口的出现这样的错误,兼容一下吧
200
+ continue
201
+ else:
202
+ # 一些垃圾第三方接口的出现这样的错误
203
+ gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
204
+
205
+ history[-1] = gpt_replying_buffer
206
+ chatbot[-1] = (history[-2], history[-1])
207
+ yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
208
+ except Exception as e:
209
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
210
+ chunk = get_full_error(chunk, stream_response)
211
+ chunk_decoded = chunk.decode()
212
+ error_msg = chunk_decoded
213
+ chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key)
214
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
215
+ print(error_msg)
216
+ return
217
+
218
+ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key=""):
219
+ from .bridge_all import model_info
220
+ openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
221
+ if "reduce the length" in error_msg:
222
+ if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
223
+ history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
224
+ max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
225
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
226
+ elif "does not exist" in error_msg:
227
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
228
+ elif "Incorrect API key" in error_msg:
229
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website); report_invalid_key(api_key)
230
+ elif "exceeded your current quota" in error_msg:
231
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
232
+ elif "account is not active" in error_msg:
233
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
234
+ elif "associated with a deactivated account" in error_msg:
235
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
236
+ elif "API key has been deactivated" in error_msg:
237
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] API key has been deactivated. OpenAI以账户失效为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
238
+ elif "bad forward key" in error_msg:
239
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
240
+ elif "Not enough point" in error_msg:
241
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
242
+ else:
243
+ from toolbox import regular_txt_to_markdown
244
+ tb_str = '```\n' + trimmed_format_exc() + '```'
245
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
246
+ return chatbot, history
247
+
248
+
249
+ def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
250
+ """
251
+ 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
252
+ """
253
+ if not is_any_api_key(llm_kwargs['api_key']):
254
+ raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
255
+
256
+ api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
257
+
258
+ headers = {
259
+ "Content-Type": "application/json",
260
+ "Authorization": f"Bearer {api_key}"
261
+ }
262
+ if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
263
+ if llm_kwargs['llm_model'].startswith('azure-'):
264
+ headers.update({"api-key": api_key})
265
+ if llm_kwargs['llm_model'] in AZURE_CFG_ARRAY.keys():
266
+ azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"]
267
+ headers.update({"api-key": azure_api_key_unshared})
268
+
269
+ base64_images = []
270
+ for image_path in image_paths:
271
+ base64_images.append(encode_image(image_path))
272
+
273
+ messages = []
274
+ what_i_ask_now = {}
275
+ what_i_ask_now["role"] = "user"
276
+ what_i_ask_now["content"] = []
277
+ what_i_ask_now["content"].append({
278
+ "type": "text",
279
+ "text": inputs
280
+ })
281
+
282
+ for image_path, base64_image in zip(image_paths, base64_images):
283
+ what_i_ask_now["content"].append({
284
+ "type": "image_url",
285
+ "image_url": {
286
+ "url": f"data:image/jpeg;base64,{base64_image}"
287
+ }
288
+ })
289
+
290
+ messages.append(what_i_ask_now)
291
+ model = llm_kwargs['llm_model']
292
+ if llm_kwargs['llm_model'].startswith('api2d-'):
293
+ model = llm_kwargs['llm_model'][len('api2d-'):]
294
+
295
+ payload = {
296
+ "model": model,
297
+ "messages": messages,
298
+ "temperature": llm_kwargs['temperature'], # 1.0,
299
+ "top_p": llm_kwargs['top_p'], # 1.0,
300
+ "n": 1,
301
+ "stream": True,
302
+ "max_tokens": get_max_token(llm_kwargs),
303
+ "presence_penalty": 0,
304
+ "frequency_penalty": 0,
305
+ }
306
+ try:
307
+ print(f" {llm_kwargs['llm_model']} : {inputs[:100]} ..........")
308
+ except:
309
+ print('输入中可能存在乱码。')
310
+ return headers, payload, api_key
311
+
312
+
request_llms/bridge_chatgpt_website.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
2
+
3
+ """
4
+ 该文件中主要包含三个函数
5
+
6
+ 不具备多线程能力的函数:
7
+ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
8
+
9
+ 具备多线程调用能力的函数
10
+ 2. predict_no_ui_long_connection:支持多线程
11
+ """
12
+
13
+ import json
14
+ import time
15
+ import gradio as gr
16
+ import logging
17
+ import traceback
18
+ import requests
19
+ import importlib
20
+
21
+ # config_private.py放自己的秘密如API和代理网址
22
+ # 读取时首先看是否存在私密的config_private配置文件(不受git管控),如果有,则覆盖原config文件
23
+ from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc
24
+ proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \
25
+ get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG')
26
+
27
+ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
28
+ '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
29
+
30
+ def get_full_error(chunk, stream_response):
31
+ """
32
+ 获取完整的从Openai返回的报错
33
+ """
34
+ while True:
35
+ try:
36
+ chunk += next(stream_response)
37
+ except:
38
+ break
39
+ return chunk
40
+
41
+
42
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
43
+ """
44
+ 发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
45
+ inputs:
46
+ 是本次问询的输入
47
+ sys_prompt:
48
+ 系统静默prompt
49
+ llm_kwargs:
50
+ chatGPT的内部调优参数
51
+ history:
52
+ 是之前的对话列表
53
+ observe_window = None:
54
+ 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
55
+ """
56
+ watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
57
+ headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
58
+ retry = 0
59
+ while True:
60
+ try:
61
+ # make a POST request to the API endpoint, stream=False
62
+ from .bridge_all import model_info
63
+ endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
64
+ response = requests.post(endpoint, headers=headers, proxies=proxies,
65
+ json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
66
+ except requests.exceptions.ReadTimeout as e:
67
+ retry += 1
68
+ traceback.print_exc()
69
+ if retry > MAX_RETRY: raise TimeoutError
70
+ if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
71
+
72
+ stream_response = response.iter_lines()
73
+ result = ''
74
+ while True:
75
+ try: chunk = next(stream_response).decode()
76
+ except StopIteration:
77
+ break
78
+ except requests.exceptions.ConnectionError:
79
+ chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
80
+ if len(chunk)==0: continue
81
+ if not chunk.startswith('data:'):
82
+ error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
83
+ if "reduce the length" in error_msg:
84
+ raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
85
+ else:
86
+ raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
87
+ if ('data: [DONE]' in chunk): break # api2d 正常完成
88
+ json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
89
+ delta = json_data["delta"]
90
+ if len(delta) == 0: break
91
+ if "role" in delta: continue
92
+ if "content" in delta:
93
+ result += delta["content"]
94
+ if not console_slience: print(delta["content"], end='')
95
+ if observe_window is not None:
96
+ # 观测窗,把已经获取的数据显示出去
97
+ if len(observe_window) >= 1: observe_window[0] += delta["content"]
98
+ # 看门狗,如果超过期限没有喂狗,则终止
99
+ if len(observe_window) >= 2:
100
+ if (time.time()-observe_window[1]) > watch_dog_patience:
101
+ raise RuntimeError("用户取消了程序。")
102
+ else: raise RuntimeError("意外Json结构:"+delta)
103
+ if json_data['finish_reason'] == 'content_filter':
104
+ raise RuntimeError("由于提问含不合规内容被Azure过滤。")
105
+ if json_data['finish_reason'] == 'length':
106
+ raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
107
+ return result
108
+
109
+
110
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
111
+ """
112
+ 发送至chatGPT,流式获取输出。
113
+ 用于基础的对话功能。
114
+ inputs 是本次问询的输入
115
+ top_p, temperature是chatGPT的内部调优参数
116
+ history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
117
+ chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
118
+ additional_fn代表点击的哪个按钮,按钮见functional.py
119
+ """
120
+ if additional_fn is not None:
121
+ from core_functional import handle_core_functionality
122
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
123
+
124
+ raw_input = inputs
125
+ logging.info(f'[raw_input] {raw_input}')
126
+ chatbot.append((inputs, ""))
127
+ yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
128
+
129
+ try:
130
+ headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
131
+ except RuntimeError as e:
132
+ chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
133
+ yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
134
+ return
135
+
136
+ history.append(inputs); history.append("")
137
+
138
+ retry = 0
139
+ while True:
140
+ try:
141
+ # make a POST request to the API endpoint, stream=True
142
+ from .bridge_all import model_info
143
+ endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
144
+ response = requests.post(endpoint, headers=headers, proxies=proxies,
145
+ json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
146
+ except:
147
+ retry += 1
148
+ chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
149
+ retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
150
+ yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
151
+ if retry > MAX_RETRY: raise TimeoutError
152
+
153
+ gpt_replying_buffer = ""
154
+
155
+ is_head_of_the_stream = True
156
+ if stream:
157
+ stream_response = response.iter_lines()
158
+ while True:
159
+ try:
160
+ chunk = next(stream_response)
161
+ except StopIteration:
162
+ # 非OpenAI官方接口的出现这样的报错,OpenAI和API2D不会走这里
163
+ chunk_decoded = chunk.decode()
164
+ error_msg = chunk_decoded
165
+ chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
166
+ yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
167
+ return
168
+
169
+ # print(chunk.decode()[6:])
170
+ if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()):
171
+ # 数据流的第一帧不携带content
172
+ is_head_of_the_stream = False; continue
173
+
174
+ if chunk:
175
+ try:
176
+ chunk_decoded = chunk.decode()
177
+ # 前者是API2D的结束条件,后者是OPENAI的结束条件
178
+ if 'data: [DONE]' in chunk_decoded:
179
+ # 判定为数据流的结束,gpt_replying_buffer也写完了
180
+ logging.info(f'[response] {gpt_replying_buffer}')
181
+ break
182
+ # 处理数据流的主体
183
+ chunkjson = json.loads(chunk_decoded[6:])
184
+ status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}"
185
+ delta = chunkjson['choices'][0]["delta"]
186
+ if "content" in delta:
187
+ gpt_replying_buffer = gpt_replying_buffer + delta["content"]
188
+ history[-1] = gpt_replying_buffer
189
+ chatbot[-1] = (history[-2], history[-1])
190
+ yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
191
+ except Exception as e:
192
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
193
+ chunk = get_full_error(chunk, stream_response)
194
+ chunk_decoded = chunk.decode()
195
+ error_msg = chunk_decoded
196
+ chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
197
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
198
+ print(error_msg)
199
+ return
200
+
201
+ def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
202
+ from .bridge_all import model_info
203
+ openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
204
+ if "reduce the length" in error_msg:
205
+ if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
206
+ history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
207
+ max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
208
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
209
+ # history = [] # 清除历史
210
+ elif "does not exist" in error_msg:
211
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
212
+ elif "Incorrect API key" in error_msg:
213
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
214
+ elif "exceeded your current quota" in error_msg:
215
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
216
+ elif "account is not active" in error_msg:
217
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
218
+ elif "associated with a deactivated account" in error_msg:
219
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
220
+ elif "bad forward key" in error_msg:
221
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
222
+ elif "Not enough point" in error_msg:
223
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
224
+ else:
225
+ from toolbox import regular_txt_to_markdown
226
+ tb_str = '```\n' + trimmed_format_exc() + '```'
227
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
228
+ return chatbot, history
229
+
230
+ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
231
+ """
232
+ 整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
233
+ """
234
+ if not is_any_api_key(llm_kwargs['api_key']):
235
+ raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案:直接在输入区键入api_key,然后回车提交。\n\n2. 长效解决方案:在config.py中配置。")
236
+
237
+ headers = {
238
+ "Content-Type": "application/json",
239
+ }
240
+
241
+ conversation_cnt = len(history) // 2
242
+
243
+ messages = [{"role": "system", "content": system_prompt}]
244
+ if conversation_cnt:
245
+ for index in range(0, 2*conversation_cnt, 2):
246
+ what_i_have_asked = {}
247
+ what_i_have_asked["role"] = "user"
248
+ what_i_have_asked["content"] = history[index]
249
+ what_gpt_answer = {}
250
+ what_gpt_answer["role"] = "assistant"
251
+ what_gpt_answer["content"] = history[index+1]
252
+ if what_i_have_asked["content"] != "":
253
+ if what_gpt_answer["content"] == "": continue
254
+ if what_gpt_answer["content"] == timeout_bot_msg: continue
255
+ messages.append(what_i_have_asked)
256
+ messages.append(what_gpt_answer)
257
+ else:
258
+ messages[-1]['content'] = what_gpt_answer['content']
259
+
260
+ what_i_ask_now = {}
261
+ what_i_ask_now["role"] = "user"
262
+ what_i_ask_now["content"] = inputs
263
+ messages.append(what_i_ask_now)
264
+
265
+ payload = {
266
+ "model": llm_kwargs['llm_model'].strip('api2d-'),
267
+ "messages": messages,
268
+ "temperature": llm_kwargs['temperature'], # 1.0,
269
+ "top_p": llm_kwargs['top_p'], # 1.0,
270
+ "n": 1,
271
+ "stream": stream,
272
+ "presence_penalty": 0,
273
+ "frequency_penalty": 0,
274
+ }
275
+ try:
276
+ print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
277
+ except:
278
+ print('输入中可能存在乱码。')
279
+ return headers,payload
280
+
281
+
request_llms/bridge_deepseekcoder.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "deepseek-coder-6.7b-instruct"
2
+ cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
3
+
4
+ import os
5
+ from toolbox import ProxyNetworkActivate
6
+ from toolbox import get_conf
7
+ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
8
+ from threading import Thread
9
+ import torch
10
+
11
+ def download_huggingface_model(model_name, max_retry, local_dir):
12
+ from huggingface_hub import snapshot_download
13
+ for i in range(1, max_retry):
14
+ try:
15
+ snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
16
+ break
17
+ except Exception as e:
18
+ print(f'\n\n下载失败,重试第{i}次中...\n\n')
19
+ return local_dir
20
+ # ------------------------------------------------------------------------------------------------------------------------
21
+ # 🔌💻 Local Model
22
+ # ------------------------------------------------------------------------------------------------------------------------
23
+ class GetCoderLMHandle(LocalLLMHandle):
24
+
25
+ def load_model_info(self):
26
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
27
+ self.model_name = model_name
28
+ self.cmd_to_install = cmd_to_install
29
+
30
+ def load_model_and_tokenizer(self):
31
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
32
+ with ProxyNetworkActivate('Download_LLM'):
33
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
34
+ model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
35
+ # local_dir = f"~/.cache/{model_name}"
36
+ # if not os.path.exists(local_dir):
37
+ # tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
38
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
39
+ self._streamer = TextIteratorStreamer(tokenizer)
40
+ device_map = {
41
+ "transformer.word_embeddings": 0,
42
+ "transformer.word_embeddings_layernorm": 0,
43
+ "lm_head": 0,
44
+ "transformer.h": 0,
45
+ "transformer.ln_f": 0,
46
+ "model.embed_tokens": 0,
47
+ "model.layers": 0,
48
+ "model.norm": 0,
49
+ }
50
+
51
+ # 检查量化配置
52
+ quantization_type = get_conf('LOCAL_MODEL_QUANT')
53
+
54
+ if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
55
+ if quantization_type == "INT8":
56
+ from transformers import BitsAndBytesConfig
57
+ # 使用 INT8 量化
58
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, load_in_8bit=True,
59
+ device_map=device_map)
60
+ elif quantization_type == "INT4":
61
+ from transformers import BitsAndBytesConfig
62
+ # 使用 INT4 量化
63
+ bnb_config = BitsAndBytesConfig(
64
+ load_in_4bit=True,
65
+ bnb_4bit_use_double_quant=True,
66
+ bnb_4bit_quant_type="nf4",
67
+ bnb_4bit_compute_dtype=torch.bfloat16
68
+ )
69
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
70
+ quantization_config=bnb_config, device_map=device_map)
71
+ else:
72
+ # 使用默认的 FP16
73
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
74
+ torch_dtype=torch.bfloat16, device_map=device_map)
75
+ else:
76
+ # CPU 模式
77
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
78
+ torch_dtype=torch.bfloat16)
79
+
80
+ return model, tokenizer
81
+
82
+ def llm_stream_generator(self, **kwargs):
83
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
84
+ def adaptor(kwargs):
85
+ query = kwargs['query']
86
+ max_length = kwargs['max_length']
87
+ top_p = kwargs['top_p']
88
+ temperature = kwargs['temperature']
89
+ history = kwargs['history']
90
+ return query, max_length, top_p, temperature, history
91
+
92
+ query, max_length, top_p, temperature, history = adaptor(kwargs)
93
+ history.append({ 'role': 'user', 'content': query})
94
+ messages = history
95
+ inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt")
96
+ if inputs.shape[1] > max_length:
97
+ inputs = inputs[:, -max_length:]
98
+ inputs = inputs.to(self._model.device)
99
+ generation_kwargs = dict(
100
+ inputs=inputs,
101
+ max_new_tokens=max_length,
102
+ do_sample=False,
103
+ top_p=top_p,
104
+ streamer = self._streamer,
105
+ top_k=50,
106
+ temperature=temperature,
107
+ num_return_sequences=1,
108
+ eos_token_id=32021,
109
+ )
110
+ thread = Thread(target=self._model.generate, kwargs=generation_kwargs, daemon=True)
111
+ thread.start()
112
+ generated_text = ""
113
+ for new_text in self._streamer:
114
+ generated_text += new_text
115
+ # print(generated_text)
116
+ yield generated_text
117
+
118
+
119
+ def try_to_import_special_deps(self, **kwargs): pass
120
+ # import something that will raise error if the user does not install requirement_*.txt
121
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
122
+ # import importlib
123
+ # importlib.import_module('modelscope')
124
+
125
+
126
+ # ------------------------------------------------------------------------------------------------------------------------
127
+ # 🔌💻 GPT-Academic Interface
128
+ # ------------------------------------------------------------------------------------------------------------------------
129
+ predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetCoderLMHandle, model_name, history_format='chatglm3')
request_llms/bridge_google_gemini.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ # @Time : 2023/12/21
3
+ # @Author : Spike
4
+ # @Descr :
5
+ import json
6
+ import re
7
+ import os
8
+ import time
9
+ from request_llms.com_google import GoogleChatInit
10
+ from toolbox import get_conf, update_ui, update_ui_lastest_msg, have_any_recent_upload_image_files, trimmed_format_exc
11
+
12
+ proxies, TIMEOUT_SECONDS, MAX_RETRY = get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY')
13
+ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
14
+ '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
15
+
16
+
17
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None,
18
+ console_slience=False):
19
+ # 检查API_KEY
20
+ if get_conf("GEMINI_API_KEY") == "":
21
+ raise ValueError(f"请配置 GEMINI_API_KEY。")
22
+
23
+ genai = GoogleChatInit()
24
+ watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
25
+ gpt_replying_buffer = ''
26
+ stream_response = genai.generate_chat(inputs, llm_kwargs, history, sys_prompt)
27
+ for response in stream_response:
28
+ results = response.decode()
29
+ match = re.search(r'"text":\s*"((?:[^"\\]|\\.)*)"', results, flags=re.DOTALL)
30
+ error_match = re.search(r'\"message\":\s*\"(.*?)\"', results, flags=re.DOTALL)
31
+ if match:
32
+ try:
33
+ paraphrase = json.loads('{"text": "%s"}' % match.group(1))
34
+ except:
35
+ raise ValueError(f"解析GEMINI消息出错。")
36
+ buffer = paraphrase['text']
37
+ gpt_replying_buffer += buffer
38
+ if len(observe_window) >= 1:
39
+ observe_window[0] = gpt_replying_buffer
40
+ if len(observe_window) >= 2:
41
+ if (time.time() - observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
42
+ if error_match:
43
+ raise RuntimeError(f'{gpt_replying_buffer} 对话错误')
44
+ return gpt_replying_buffer
45
+
46
+
47
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
48
+ # 检查API_KEY
49
+ if get_conf("GEMINI_API_KEY") == "":
50
+ yield from update_ui_lastest_msg(f"请配置 GEMINI_API_KEY。", chatbot=chatbot, history=history, delay=0)
51
+ return
52
+
53
+ if "vision" in llm_kwargs["llm_model"]:
54
+ have_recent_file, image_paths = have_any_recent_upload_image_files(chatbot)
55
+ def make_media_input(inputs, image_paths):
56
+ for image_path in image_paths:
57
+ inputs = inputs + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
58
+ return inputs
59
+ if have_recent_file:
60
+ inputs = make_media_input(inputs, image_paths)
61
+
62
+ chatbot.append((inputs, ""))
63
+ yield from update_ui(chatbot=chatbot, history=history)
64
+ genai = GoogleChatInit()
65
+ retry = 0
66
+ while True:
67
+ try:
68
+ stream_response = genai.generate_chat(inputs, llm_kwargs, history, system_prompt)
69
+ break
70
+ except Exception as e:
71
+ retry += 1
72
+ chatbot[-1] = ((chatbot[-1][0], trimmed_format_exc()))
73
+ yield from update_ui(chatbot=chatbot, history=history, msg="请求失败") # 刷新界面
74
+ return
75
+ gpt_replying_buffer = ""
76
+ gpt_security_policy = ""
77
+ history.extend([inputs, ''])
78
+ for response in stream_response:
79
+ results = response.decode("utf-8") # 被这个解码给耍了。。
80
+ gpt_security_policy += results
81
+ match = re.search(r'"text":\s*"((?:[^"\\]|\\.)*)"', results, flags=re.DOTALL)
82
+ error_match = re.search(r'\"message\":\s*\"(.*)\"', results, flags=re.DOTALL)
83
+ if match:
84
+ try:
85
+ paraphrase = json.loads('{"text": "%s"}' % match.group(1))
86
+ except:
87
+ raise ValueError(f"解析GEMINI消息出错。")
88
+ gpt_replying_buffer += paraphrase['text'] # 使用 json 解析库进行处理
89
+ chatbot[-1] = (inputs, gpt_replying_buffer)
90
+ history[-1] = gpt_replying_buffer
91
+ yield from update_ui(chatbot=chatbot, history=history)
92
+ if error_match:
93
+ history = history[-2] # 错误的不纳入对话
94
+ chatbot[-1] = (inputs, gpt_replying_buffer + f"对话错误,请查看message\n\n```\n{error_match.group(1)}\n```")
95
+ yield from update_ui(chatbot=chatbot, history=history)
96
+ raise RuntimeError('对话错误')
97
+ if not gpt_replying_buffer:
98
+ history = history[-2] # 错误的不纳入对话
99
+ chatbot[-1] = (inputs, gpt_replying_buffer + f"触发了Google的安全访问策略,没有回答\n\n```\n{gpt_security_policy}\n```")
100
+ yield from update_ui(chatbot=chatbot, history=history)
101
+
102
+
103
+
104
+ if __name__ == '__main__':
105
+ import sys
106
+ llm_kwargs = {'llm_model': 'gemini-pro'}
107
+ result = predict('Write long a story about a magic backpack.', llm_kwargs, llm_kwargs, [])
108
+ for i in result:
109
+ print(i)
request_llms/bridge_internlm.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "InternLM"
2
+ cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
3
+
4
+ from transformers import AutoModel, AutoTokenizer
5
+ import time
6
+ import threading
7
+ import importlib
8
+ from toolbox import update_ui, get_conf, ProxyNetworkActivate
9
+ from multiprocessing import Process, Pipe
10
+ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
11
+
12
+
13
+ # ------------------------------------------------------------------------------------------------------------------------
14
+ # 🔌💻 Local Model Utils
15
+ # ------------------------------------------------------------------------------------------------------------------------
16
+ def try_to_import_special_deps():
17
+ import sentencepiece
18
+
19
+ def combine_history(prompt, hist):
20
+ user_prompt = "<|User|>:{user}<eoh>\n"
21
+ robot_prompt = "<|Bot|>:{robot}<eoa>\n"
22
+ cur_query_prompt = "<|User|>:{user}<eoh>\n<|Bot|>:"
23
+ messages = hist
24
+ total_prompt = ""
25
+ for message in messages:
26
+ cur_content = message
27
+ cur_prompt = user_prompt.replace("{user}", cur_content[0])
28
+ total_prompt += cur_prompt
29
+ cur_prompt = robot_prompt.replace("{robot}", cur_content[1])
30
+ total_prompt += cur_prompt
31
+ total_prompt = total_prompt + cur_query_prompt.replace("{user}", prompt)
32
+ return total_prompt
33
+
34
+ # ------------------------------------------------------------------------------------------------------------------------
35
+ # 🔌💻 Local Model
36
+ # ------------------------------------------------------------------------------------------------------------------------
37
+ class GetInternlmHandle(LocalLLMHandle):
38
+
39
+ def load_model_info(self):
40
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
41
+ self.model_name = model_name
42
+ self.cmd_to_install = cmd_to_install
43
+
44
+ def try_to_import_special_deps(self, **kwargs):
45
+ """
46
+ import something that will raise error if the user does not install requirement_*.txt
47
+ """
48
+ import sentencepiece
49
+
50
+ def load_model_and_tokenizer(self):
51
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
52
+ import torch
53
+ from transformers import AutoModelForCausalLM, AutoTokenizer
54
+ device = get_conf('LOCAL_MODEL_DEVICE')
55
+ with ProxyNetworkActivate('Download_LLM'):
56
+ if self._model is None:
57
+ tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
58
+ if device=='cpu':
59
+ model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
60
+ else:
61
+ model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
62
+
63
+ model = model.eval()
64
+ return model, tokenizer
65
+
66
+ def llm_stream_generator(self, **kwargs):
67
+ import torch
68
+ import logging
69
+ import copy
70
+ import warnings
71
+ import torch.nn as nn
72
+ from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
73
+
74
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
75
+ def adaptor():
76
+ model = self._model
77
+ tokenizer = self._tokenizer
78
+ prompt = kwargs['query']
79
+ max_length = kwargs['max_length']
80
+ top_p = kwargs['top_p']
81
+ temperature = kwargs['temperature']
82
+ history = kwargs['history']
83
+ real_prompt = combine_history(prompt, history)
84
+ return model, tokenizer, real_prompt, max_length, top_p, temperature
85
+
86
+ model, tokenizer, prompt, max_length, top_p, temperature = adaptor()
87
+ prefix_allowed_tokens_fn = None
88
+ logits_processor = None
89
+ stopping_criteria = None
90
+ additional_eos_token_id = 103028
91
+ generation_config = None
92
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
93
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ https://github.com/InternLM/InternLM/blob/efbf5335709a8c8faeac6eaf07193973ff1d56a1/web_demo.py#L25
94
+
95
+ inputs = tokenizer([prompt], padding=True, return_tensors="pt")
96
+ input_length = len(inputs["input_ids"][0])
97
+ device = get_conf('LOCAL_MODEL_DEVICE')
98
+ for k, v in inputs.items():
99
+ inputs[k] = v.to(device)
100
+ input_ids = inputs["input_ids"]
101
+ batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
102
+ if generation_config is None:
103
+ generation_config = model.generation_config
104
+ generation_config = copy.deepcopy(generation_config)
105
+ model_kwargs = generation_config.update(**kwargs)
106
+ bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
107
+ if isinstance(eos_token_id, int):
108
+ eos_token_id = [eos_token_id]
109
+ if additional_eos_token_id is not None:
110
+ eos_token_id.append(additional_eos_token_id)
111
+ has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
112
+ if has_default_max_length and generation_config.max_new_tokens is None:
113
+ warnings.warn(
114
+ f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
115
+ "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
116
+ " recommend using `max_new_tokens` to control the maximum length of the generation.",
117
+ UserWarning,
118
+ )
119
+ elif generation_config.max_new_tokens is not None:
120
+ generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
121
+ if not has_default_max_length:
122
+ logging.warn(
123
+ f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
124
+ f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
125
+ "Please refer to the documentation for more information. "
126
+ "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
127
+ UserWarning,
128
+ )
129
+
130
+ if input_ids_seq_length >= generation_config.max_length:
131
+ input_ids_string = "input_ids"
132
+ logging.warning(
133
+ f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
134
+ f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
135
+ " increasing `max_new_tokens`."
136
+ )
137
+
138
+ # 2. Set generation parameters if not already defined
139
+ logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
140
+ stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
141
+
142
+ logits_processor = model._get_logits_processor(
143
+ generation_config=generation_config,
144
+ input_ids_seq_length=input_ids_seq_length,
145
+ encoder_input_ids=input_ids,
146
+ prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
147
+ logits_processor=logits_processor,
148
+ )
149
+
150
+ stopping_criteria = model._get_stopping_criteria(
151
+ generation_config=generation_config, stopping_criteria=stopping_criteria
152
+ )
153
+ logits_warper = model._get_logits_warper(generation_config)
154
+
155
+ unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
156
+ scores = None
157
+ while True:
158
+ model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
159
+ # forward pass to get next token
160
+ outputs = model(
161
+ **model_inputs,
162
+ return_dict=True,
163
+ output_attentions=False,
164
+ output_hidden_states=False,
165
+ )
166
+
167
+ next_token_logits = outputs.logits[:, -1, :]
168
+
169
+ # pre-process distribution
170
+ next_token_scores = logits_processor(input_ids, next_token_logits)
171
+ next_token_scores = logits_warper(input_ids, next_token_scores)
172
+
173
+ # sample
174
+ probs = nn.functional.softmax(next_token_scores, dim=-1)
175
+ if generation_config.do_sample:
176
+ next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
177
+ else:
178
+ next_tokens = torch.argmax(probs, dim=-1)
179
+
180
+ # update generated ids, model inputs, and length for next step
181
+ input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
182
+ model_kwargs = model._update_model_kwargs_for_generation(
183
+ outputs, model_kwargs, is_encoder_decoder=False
184
+ )
185
+ unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
186
+
187
+ output_token_ids = input_ids[0].cpu().tolist()
188
+ output_token_ids = output_token_ids[input_length:]
189
+ for each_eos_token_id in eos_token_id:
190
+ if output_token_ids[-1] == each_eos_token_id:
191
+ output_token_ids = output_token_ids[:-1]
192
+ response = tokenizer.decode(output_token_ids)
193
+
194
+ yield response
195
+ # stop when each sentence is finished, or if we exceed the maximum length
196
+ if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
197
+ return
198
+
199
+
200
+ # ------------------------------------------------------------------------------------------------------------------------
201
+ # 🔌💻 GPT-Academic Interface
202
+ # ------------------------------------------------------------------------------------------------------------------------
203
+ predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetInternlmHandle, model_name)
request_llms/bridge_llama2.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "LLaMA"
2
+ cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
3
+
4
+
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
+ from toolbox import update_ui, get_conf, ProxyNetworkActivate
7
+ from multiprocessing import Process, Pipe
8
+ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
9
+ from threading import Thread
10
+
11
+
12
+ # ------------------------------------------------------------------------------------------------------------------------
13
+ # 🔌💻 Local Model
14
+ # ------------------------------------------------------------------------------------------------------------------------
15
+ class GetLlamaHandle(LocalLLMHandle):
16
+
17
+ def load_model_info(self):
18
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
19
+ self.model_name = model_name
20
+ self.cmd_to_install = cmd_to_install
21
+
22
+ def load_model_and_tokenizer(self):
23
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
24
+ import os, glob
25
+ import os
26
+ import platform
27
+ huggingface_token, device = get_conf('HUGGINGFACE_ACCESS_TOKEN', 'LOCAL_MODEL_DEVICE')
28
+ assert len(huggingface_token) != 0, "没有填写 HUGGINGFACE_ACCESS_TOKEN"
29
+ with open(os.path.expanduser('~/.cache/huggingface/token'), 'w') as f:
30
+ f.write(huggingface_token)
31
+ model_id = 'meta-llama/Llama-2-7b-chat-hf'
32
+ with ProxyNetworkActivate('Download_LLM'):
33
+ self._tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=huggingface_token)
34
+ # use fp16
35
+ model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=huggingface_token).eval()
36
+ if device.startswith('cuda'): model = model.half().to(device)
37
+ self._model = model
38
+
39
+ return self._model, self._tokenizer
40
+
41
+ def llm_stream_generator(self, **kwargs):
42
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
43
+ def adaptor(kwargs):
44
+ query = kwargs['query']
45
+ max_length = kwargs['max_length']
46
+ top_p = kwargs['top_p']
47
+ temperature = kwargs['temperature']
48
+ history = kwargs['history']
49
+ console_slience = kwargs.get('console_slience', True)
50
+ return query, max_length, top_p, temperature, history, console_slience
51
+
52
+ def convert_messages_to_prompt(query, history):
53
+ prompt = ""
54
+ for a, b in history:
55
+ prompt += f"\n[INST]{a}[/INST]"
56
+ prompt += "\n{b}" + b
57
+ prompt += f"\n[INST]{query}[/INST]"
58
+ return prompt
59
+
60
+ query, max_length, top_p, temperature, history, console_slience = adaptor(kwargs)
61
+ prompt = convert_messages_to_prompt(query, history)
62
+ # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
63
+ # code from transformers.llama
64
+ streamer = TextIteratorStreamer(self._tokenizer)
65
+ # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
66
+ inputs = self._tokenizer([prompt], return_tensors="pt")
67
+ prompt_tk_back = self._tokenizer.batch_decode(inputs['input_ids'])[0]
68
+
69
+ generation_kwargs = dict(inputs.to(self._model.device), streamer=streamer, max_new_tokens=max_length)
70
+ thread = Thread(target=self._model.generate, kwargs=generation_kwargs)
71
+ thread.start()
72
+ generated_text = ""
73
+ for new_text in streamer:
74
+ generated_text += new_text
75
+ if not console_slience: print(new_text, end='')
76
+ yield generated_text.lstrip(prompt_tk_back).rstrip("</s>")
77
+ if not console_slience: print()
78
+ # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
79
+
80
+ def try_to_import_special_deps(self, **kwargs):
81
+ # import something that will raise error if the user does not install requirement_*.txt
82
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
83
+ import importlib
84
+ importlib.import_module('transformers')
85
+
86
+
87
+ # ------------------------------------------------------------------------------------------------------------------------
88
+ # 🔌💻 GPT-Academic Interface
89
+ # ------------------------------------------------------------------------------------------------------------------------
90
+ predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetLlamaHandle, model_name)
request_llms/bridge_newbingfree.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ========================================================================
3
+ 第一部分:来自EdgeGPT.py
4
+ https://github.com/acheong08/EdgeGPT
5
+ ========================================================================
6
+ """
7
+ from .edge_gpt_free import Chatbot as NewbingChatbot
8
+ load_message = "等待NewBing响应。"
9
+
10
+ """
11
+ ========================================================================
12
+ 第二部分:子进程Worker(调用主体)
13
+ ========================================================================
14
+ """
15
+ import time
16
+ import json
17
+ import re
18
+ import logging
19
+ import asyncio
20
+ import importlib
21
+ import threading
22
+ from toolbox import update_ui, get_conf, trimmed_format_exc
23
+ from multiprocessing import Process, Pipe
24
+
25
+ def preprocess_newbing_out(s):
26
+ pattern = r'\^(\d+)\^' # 匹配^数字^
27
+ sub = lambda m: '('+m.group(1)+')' # 将匹配到的数字作为替换值
28
+ result = re.sub(pattern, sub, s) # 替换操作
29
+ if '[1]' in result:
30
+ result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n'
31
+ return result
32
+
33
+ def preprocess_newbing_out_simple(result):
34
+ if '[1]' in result:
35
+ result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n'
36
+ return result
37
+
38
+ class NewBingHandle(Process):
39
+ def __init__(self):
40
+ super().__init__(daemon=True)
41
+ self.parent, self.child = Pipe()
42
+ self.newbing_model = None
43
+ self.info = ""
44
+ self.success = True
45
+ self.local_history = []
46
+ self.check_dependency()
47
+ self.start()
48
+ self.threadLock = threading.Lock()
49
+
50
+ def check_dependency(self):
51
+ try:
52
+ self.success = False
53
+ import certifi, httpx, rich
54
+ self.info = "依赖检测通过,等待NewBing响应。注意目前不能多人同时调用NewBing接口(有线程锁),否则将导致每个人的NewBing问询历史互相渗透。调用NewBing时,会自动使用已配置的代理。"
55
+ self.success = True
56
+ except:
57
+ self.info = "缺少的依赖,如果要使用Newbing,除了基础的pip依赖以外,您还需要运行`pip install -r request_llms/requirements_newbing.txt`安装Newbing的依赖。"
58
+ self.success = False
59
+
60
+ def ready(self):
61
+ return self.newbing_model is not None
62
+
63
+ async def async_run(self):
64
+ # 读取配置
65
+ NEWBING_STYLE = get_conf('NEWBING_STYLE')
66
+ from request_llms.bridge_all import model_info
67
+ endpoint = model_info['newbing']['endpoint']
68
+ while True:
69
+ # 等待
70
+ kwargs = self.child.recv()
71
+ question=kwargs['query']
72
+ history=kwargs['history']
73
+ system_prompt=kwargs['system_prompt']
74
+
75
+ # 是否重置
76
+ if len(self.local_history) > 0 and len(history)==0:
77
+ await self.newbing_model.reset()
78
+ self.local_history = []
79
+
80
+ # 开始问问题
81
+ prompt = ""
82
+ if system_prompt not in self.local_history:
83
+ self.local_history.append(system_prompt)
84
+ prompt += system_prompt + '\n'
85
+
86
+ # 追加历史
87
+ for ab in history:
88
+ a, b = ab
89
+ if a not in self.local_history:
90
+ self.local_history.append(a)
91
+ prompt += a + '\n'
92
+
93
+ # 问题
94
+ prompt += question
95
+ self.local_history.append(question)
96
+ print('question:', prompt)
97
+ # 提交
98
+ async for final, response in self.newbing_model.ask_stream(
99
+ prompt=question,
100
+ conversation_style=NEWBING_STYLE, # ["creative", "balanced", "precise"]
101
+ wss_link=endpoint, # "wss://sydney.bing.com/sydney/ChatHub"
102
+ ):
103
+ if not final:
104
+ print(response)
105
+ self.child.send(str(response))
106
+ else:
107
+ print('-------- receive final ---------')
108
+ self.child.send('[Finish]')
109
+ # self.local_history.append(response)
110
+
111
+
112
+ def run(self):
113
+ """
114
+ 这个函数运行在子进程
115
+ """
116
+ # 第一次运行,加载参数
117
+ self.success = False
118
+ self.local_history = []
119
+ if (self.newbing_model is None) or (not self.success):
120
+ # 代理设置
121
+ proxies, NEWBING_COOKIES = get_conf('proxies', 'NEWBING_COOKIES')
122
+ if proxies is None:
123
+ self.proxies_https = None
124
+ else:
125
+ self.proxies_https = proxies['https']
126
+
127
+ if (NEWBING_COOKIES is not None) and len(NEWBING_COOKIES) > 100:
128
+ try:
129
+ cookies = json.loads(NEWBING_COOKIES)
130
+ except:
131
+ self.success = False
132
+ tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
133
+ self.child.send(f'[Local Message] NEWBING_COOKIES未填写或有格式错误。')
134
+ self.child.send('[Fail]'); self.child.send('[Finish]')
135
+ raise RuntimeError(f"NEWBING_COOKIES未填写或有格式错误。")
136
+ else:
137
+ cookies = None
138
+
139
+ try:
140
+ self.newbing_model = NewbingChatbot(proxy=self.proxies_https, cookies=cookies)
141
+ except:
142
+ self.success = False
143
+ tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
144
+ self.child.send(f'[Local Message] 不能加载Newbing组件,请注意Newbing组件已不再维护。{tb_str}')
145
+ self.child.send('[Fail]')
146
+ self.child.send('[Finish]')
147
+ raise RuntimeError(f"不能加载Newbing组件,请注意Newbing组件已不再维护。")
148
+
149
+ self.success = True
150
+ try:
151
+ # 进入任务等待状态
152
+ asyncio.run(self.async_run())
153
+ except Exception:
154
+ tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
155
+ self.child.send(f'[Local Message] Newbing 请求失败,报错信息如下. 如果是与网络相关的问题,建议更换代理协议(推荐http)或代理节点 {tb_str}.')
156
+ self.child.send('[Fail]')
157
+ self.child.send('[Finish]')
158
+
159
+ def stream_chat(self, **kwargs):
160
+ """
161
+ 这个函数运行在主进程
162
+ """
163
+ self.threadLock.acquire() # 获取线程锁
164
+ self.parent.send(kwargs) # 请求子进程
165
+ while True:
166
+ res = self.parent.recv() # 等待newbing回复的片段
167
+ if res == '[Finish]': break # 结束
168
+ elif res == '[Fail]': self.success = False; break # 失败
169
+ else: yield res # newbing回复的片段
170
+ self.threadLock.release() # 释放线程锁
171
+
172
+
173
+ """
174
+ ========================================================================
175
+ 第三部分:主进程统一调用函数接口
176
+ ========================================================================
177
+ """
178
+ global newbingfree_handle
179
+ newbingfree_handle = None
180
+
181
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
182
+ """
183
+ 多线程方法
184
+ 函数的说明请见 request_llms/bridge_all.py
185
+ """
186
+ global newbingfree_handle
187
+ if (newbingfree_handle is None) or (not newbingfree_handle.success):
188
+ newbingfree_handle = NewBingHandle()
189
+ if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + newbingfree_handle.info
190
+ if not newbingfree_handle.success:
191
+ error = newbingfree_handle.info
192
+ newbingfree_handle = None
193
+ raise RuntimeError(error)
194
+
195
+ # 没有 sys_prompt 接口,因此把prompt加入 history
196
+ history_feedin = []
197
+ for i in range(len(history)//2):
198
+ history_feedin.append([history[2*i], history[2*i+1]] )
199
+
200
+ watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
201
+ response = ""
202
+ if len(observe_window) >= 1: observe_window[0] = "[Local Message] 等待NewBing响应中 ..."
203
+ for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
204
+ if len(observe_window) >= 1: observe_window[0] = preprocess_newbing_out_simple(response)
205
+ if len(observe_window) >= 2:
206
+ if (time.time()-observe_window[1]) > watch_dog_patience:
207
+ raise RuntimeError("程序终止。")
208
+ return preprocess_newbing_out_simple(response)
209
+
210
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
211
+ """
212
+ 单线程方法
213
+ 函数的说明请见 request_llms/bridge_all.py
214
+ """
215
+ chatbot.append((inputs, "[Local Message] 等待NewBing响应中 ..."))
216
+
217
+ global newbingfree_handle
218
+ if (newbingfree_handle is None) or (not newbingfree_handle.success):
219
+ newbingfree_handle = NewBingHandle()
220
+ chatbot[-1] = (inputs, load_message + "\n\n" + newbingfree_handle.info)
221
+ yield from update_ui(chatbot=chatbot, history=[])
222
+ if not newbingfree_handle.success:
223
+ newbingfree_handle = None
224
+ return
225
+
226
+ if additional_fn is not None:
227
+ from core_functional import handle_core_functionality
228
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
229
+
230
+ history_feedin = []
231
+ for i in range(len(history)//2):
232
+ history_feedin.append([history[2*i], history[2*i+1]] )
233
+
234
+ chatbot[-1] = (inputs, "[Local Message] 等待NewBing响应中 ...")
235
+ response = "[Local Message] 等待NewBing响应中 ..."
236
+ yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓���,尚未完成全部响应,请耐心完成后再提交新问题。")
237
+ for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
238
+ chatbot[-1] = (inputs, preprocess_newbing_out(response))
239
+ yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢,尚未完成全部响应,请耐心完成后再提交新问题。")
240
+ if response == "[Local Message] 等待NewBing响应中 ...": response = "[Local Message] NewBing响应异常,请刷新界面重试 ..."
241
+ history.extend([inputs, response])
242
+ logging.info(f'[raw_input] {inputs}')
243
+ logging.info(f'[response] {response}')
244
+ yield from update_ui(chatbot=chatbot, history=history, msg="完成全部响应,请提交新问题。")
245
+
request_llms/bridge_qianfan.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time, requests, json
3
+ from multiprocessing import Process, Pipe
4
+ from functools import wraps
5
+ from datetime import datetime, timedelta
6
+ from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, get_conf
7
+
8
+ model_name = '千帆大模型平台'
9
+ timeout_bot_msg = '[Local Message] Request timeout. Network error.'
10
+
11
+ def cache_decorator(timeout):
12
+ cache = {}
13
+ def decorator(func):
14
+ @wraps(func)
15
+ def wrapper(*args, **kwargs):
16
+ key = (func.__name__, args, frozenset(kwargs.items()))
17
+ # Check if result is already cached and not expired
18
+ if key in cache:
19
+ result, timestamp = cache[key]
20
+ if datetime.now() - timestamp < timedelta(seconds=timeout):
21
+ return result
22
+
23
+ # Call the function and cache the result
24
+ result = func(*args, **kwargs)
25
+ cache[key] = (result, datetime.now())
26
+ return result
27
+ return wrapper
28
+ return decorator
29
+
30
+ @cache_decorator(timeout=3600)
31
+ def get_access_token():
32
+ """
33
+ 使用 AK,SK 生成鉴权签名(Access Token)
34
+ :return: access_token,或是None(如果错误)
35
+ """
36
+ # if (access_token_cache is None) or (time.time() - last_access_token_obtain_time > 3600):
37
+ BAIDU_CLOUD_API_KEY, BAIDU_CLOUD_SECRET_KEY = get_conf('BAIDU_CLOUD_API_KEY', 'BAIDU_CLOUD_SECRET_KEY')
38
+
39
+ if len(BAIDU_CLOUD_SECRET_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_SECRET_KEY")
40
+ if len(BAIDU_CLOUD_API_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_API_KEY")
41
+
42
+ url = "https://aip.baidubce.com/oauth/2.0/token"
43
+ params = {"grant_type": "client_credentials", "client_id": BAIDU_CLOUD_API_KEY, "client_secret": BAIDU_CLOUD_SECRET_KEY}
44
+ access_token_cache = str(requests.post(url, params=params).json().get("access_token"))
45
+ return access_token_cache
46
+ # else:
47
+ # return access_token_cache
48
+
49
+
50
+ def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
51
+ conversation_cnt = len(history) // 2
52
+ if system_prompt == "": system_prompt = "Hello"
53
+ messages = [{"role": "user", "content": system_prompt}]
54
+ messages.append({"role": "assistant", "content": 'Certainly!'})
55
+ if conversation_cnt:
56
+ for index in range(0, 2*conversation_cnt, 2):
57
+ what_i_have_asked = {}
58
+ what_i_have_asked["role"] = "user"
59
+ what_i_have_asked["content"] = history[index] if history[index]!="" else "Hello"
60
+ what_gpt_answer = {}
61
+ what_gpt_answer["role"] = "assistant"
62
+ what_gpt_answer["content"] = history[index+1] if history[index]!="" else "Hello"
63
+ if what_i_have_asked["content"] != "":
64
+ if what_gpt_answer["content"] == "": continue
65
+ if what_gpt_answer["content"] == timeout_bot_msg: continue
66
+ messages.append(what_i_have_asked)
67
+ messages.append(what_gpt_answer)
68
+ else:
69
+ messages[-1]['content'] = what_gpt_answer['content']
70
+ what_i_ask_now = {}
71
+ what_i_ask_now["role"] = "user"
72
+ what_i_ask_now["content"] = inputs
73
+ messages.append(what_i_ask_now)
74
+ return messages
75
+
76
+
77
+ def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
78
+ BAIDU_CLOUD_QIANFAN_MODEL = get_conf('BAIDU_CLOUD_QIANFAN_MODEL')
79
+
80
+ url_lib = {
81
+ "ERNIE-Bot-4": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro",
82
+ "ERNIE-Bot": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions",
83
+ "ERNIE-Bot-turbo": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant",
84
+ "BLOOMZ-7B": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/bloomz_7b1",
85
+
86
+ "Llama-2-70B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_70b",
87
+ "Llama-2-13B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_13b",
88
+ "Llama-2-7B-Chat": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_7b",
89
+ }
90
+
91
+ url = url_lib[BAIDU_CLOUD_QIANFAN_MODEL]
92
+
93
+ url += "?access_token=" + get_access_token()
94
+
95
+
96
+ payload = json.dumps({
97
+ "messages": generate_message_payload(inputs, llm_kwargs, history, system_prompt),
98
+ "stream": True
99
+ })
100
+ headers = {
101
+ 'Content-Type': 'application/json'
102
+ }
103
+ response = requests.request("POST", url, headers=headers, data=payload, stream=True)
104
+ buffer = ""
105
+ for line in response.iter_lines():
106
+ if len(line) == 0: continue
107
+ try:
108
+ dec = line.decode().lstrip('data:')
109
+ dec = json.loads(dec)
110
+ incoming = dec['result']
111
+ buffer += incoming
112
+ yield buffer
113
+ except:
114
+ if ('error_code' in dec) and ("max length" in dec['error_msg']):
115
+ raise ConnectionAbortedError(dec['error_msg']) # 上下文太长导致 token 溢出
116
+ elif ('error_code' in dec):
117
+ raise RuntimeError(dec['error_msg'])
118
+
119
+
120
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
121
+ """
122
+ ⭐多线程方法
123
+ 函数的说明请见 request_llms/bridge_all.py
124
+ """
125
+ watch_dog_patience = 5
126
+ response = ""
127
+
128
+ for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, sys_prompt):
129
+ if len(observe_window) >= 1:
130
+ observe_window[0] = response
131
+ if len(observe_window) >= 2:
132
+ if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
133
+ return response
134
+
135
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
136
+ """
137
+ ⭐单线程方法
138
+ 函数的说明请见 request_llms/bridge_all.py
139
+ """
140
+ chatbot.append((inputs, ""))
141
+
142
+ if additional_fn is not None:
143
+ from core_functional import handle_core_functionality
144
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
145
+
146
+ yield from update_ui(chatbot=chatbot, history=history)
147
+ # 开始接收回复
148
+ try:
149
+ for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
150
+ chatbot[-1] = (inputs, response)
151
+ yield from update_ui(chatbot=chatbot, history=history)
152
+ except ConnectionAbortedError as e:
153
+ from .bridge_all import model_info
154
+ if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
155
+ history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
156
+ max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
157
+ chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
158
+ yield from update_ui(chatbot=chatbot, history=history, msg="异常") # 刷新界面
159
+ return
160
+
161
+ # 总结输出
162
+ response = f"[Local Message] {model_name}响应异常 ..."
163
+ if response == f"[Local Message] 等待{model_name}响应中 ...":
164
+ response = f"[Local Message] {model_name}响应异常 ..."
165
+ history.extend([inputs, response])
166
+ yield from update_ui(chatbot=chatbot, history=history)
request_llms/bridge_qwen.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import os
3
+ from toolbox import update_ui, get_conf, update_ui_lastest_msg
4
+ from toolbox import check_packages, report_exception
5
+
6
+ model_name = 'Qwen'
7
+
8
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
9
+ """
10
+ ⭐多线程方法
11
+ 函数的说明请见 request_llms/bridge_all.py
12
+ """
13
+ watch_dog_patience = 5
14
+ response = ""
15
+
16
+ from .com_qwenapi import QwenRequestInstance
17
+ sri = QwenRequestInstance()
18
+ for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
19
+ if len(observe_window) >= 1:
20
+ observe_window[0] = response
21
+ if len(observe_window) >= 2:
22
+ if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
23
+ return response
24
+
25
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
26
+ """
27
+ ⭐单线程方法
28
+ 函数的说明请见 request_llms/bridge_all.py
29
+ """
30
+ chatbot.append((inputs, ""))
31
+ yield from update_ui(chatbot=chatbot, history=history)
32
+
33
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
34
+ try:
35
+ check_packages(["dashscope"])
36
+ except:
37
+ yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade dashscope```。",
38
+ chatbot=chatbot, history=history, delay=0)
39
+ return
40
+
41
+ # 检查DASHSCOPE_API_KEY
42
+ if get_conf("DASHSCOPE_API_KEY") == "":
43
+ yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。",
44
+ chatbot=chatbot, history=history, delay=0)
45
+ return
46
+
47
+ if additional_fn is not None:
48
+ from core_functional import handle_core_functionality
49
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
50
+
51
+ # 开始接收回复
52
+ from .com_qwenapi import QwenRequestInstance
53
+ sri = QwenRequestInstance()
54
+ for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
55
+ chatbot[-1] = (inputs, response)
56
+ yield from update_ui(chatbot=chatbot, history=history)
57
+
58
+ # 总结输出
59
+ if response == f"[Local Message] 等待{model_name}响应中 ...":
60
+ response = f"[Local Message] {model_name}响应异常 ..."
61
+ history.extend([inputs, response])
62
+ yield from update_ui(chatbot=chatbot, history=history)
request_llms/bridge_qwen_local.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "Qwen_Local"
2
+ cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`"
3
+
4
+ from toolbox import ProxyNetworkActivate, get_conf
5
+ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
6
+
7
+
8
+
9
+ # ------------------------------------------------------------------------------------------------------------------------
10
+ # 🔌💻 Local Model
11
+ # ------------------------------------------------------------------------------------------------------------------------
12
+ class GetQwenLMHandle(LocalLLMHandle):
13
+
14
+ def load_model_info(self):
15
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
16
+ self.model_name = model_name
17
+ self.cmd_to_install = cmd_to_install
18
+
19
+ def load_model_and_tokenizer(self):
20
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
21
+ # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
22
+ from transformers import AutoModelForCausalLM, AutoTokenizer
23
+ from transformers.generation import GenerationConfig
24
+ with ProxyNetworkActivate('Download_LLM'):
25
+ model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION')
26
+ self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
27
+ # use fp16
28
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
29
+ model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
30
+ self._model = model
31
+
32
+ return self._model, self._tokenizer
33
+
34
+ def llm_stream_generator(self, **kwargs):
35
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
36
+ def adaptor(kwargs):
37
+ query = kwargs['query']
38
+ max_length = kwargs['max_length']
39
+ top_p = kwargs['top_p']
40
+ temperature = kwargs['temperature']
41
+ history = kwargs['history']
42
+ return query, max_length, top_p, temperature, history
43
+
44
+ query, max_length, top_p, temperature, history = adaptor(kwargs)
45
+
46
+ for response in self._model.chat_stream(self._tokenizer, query, history=history):
47
+ yield response
48
+
49
+ def try_to_import_special_deps(self, **kwargs):
50
+ # import something that will raise error if the user does not install requirement_*.txt
51
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
52
+ import importlib
53
+ importlib.import_module('modelscope')
54
+
55
+
56
+ # ------------------------------------------------------------------------------------------------------------------------
57
+ # 🔌💻 GPT-Academic Interface
58
+ # ------------------------------------------------------------------------------------------------------------------------
59
+ predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
request_llms/bridge_spark.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ import threading
4
+ import importlib
5
+ from toolbox import update_ui, get_conf, update_ui_lastest_msg
6
+ from multiprocessing import Process, Pipe
7
+
8
+ model_name = '星火认知大模型'
9
+
10
+ def validate_key():
11
+ XFYUN_APPID = get_conf('XFYUN_APPID')
12
+ if XFYUN_APPID == '00000000' or XFYUN_APPID == '':
13
+ return False
14
+ return True
15
+
16
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
17
+ """
18
+ ⭐多线程方法
19
+ 函数的说明请见 request_llms/bridge_all.py
20
+ """
21
+ watch_dog_patience = 5
22
+ response = ""
23
+
24
+ if validate_key() is False:
25
+ raise RuntimeError('请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET')
26
+
27
+ from .com_sparkapi import SparkRequestInstance
28
+ sri = SparkRequestInstance()
29
+ for response in sri.generate(inputs, llm_kwargs, history, sys_prompt, use_image_api=False):
30
+ if len(observe_window) >= 1:
31
+ observe_window[0] = response
32
+ if len(observe_window) >= 2:
33
+ if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
34
+ return response
35
+
36
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
37
+ """
38
+ ⭐单线程方法
39
+ 函数的说明请见 request_llms/bridge_all.py
40
+ """
41
+ chatbot.append((inputs, ""))
42
+ yield from update_ui(chatbot=chatbot, history=history)
43
+
44
+ if validate_key() is False:
45
+ yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET", chatbot=chatbot, history=history, delay=0)
46
+ return
47
+
48
+ if additional_fn is not None:
49
+ from core_functional import handle_core_functionality
50
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
51
+
52
+ # 开始接收回复
53
+ from .com_sparkapi import SparkRequestInstance
54
+ sri = SparkRequestInstance()
55
+ for response in sri.generate(inputs, llm_kwargs, history, system_prompt, use_image_api=True):
56
+ chatbot[-1] = (inputs, response)
57
+ yield from update_ui(chatbot=chatbot, history=history)
58
+
59
+ # 总结输出
60
+ if response == f"[Local Message] 等待{model_name}响应中 ...":
61
+ response = f"[Local Message] {model_name}响应异常 ..."
62
+ history.extend([inputs, response])
63
+ yield from update_ui(chatbot=chatbot, history=history)
request_llms/bridge_stackclaude.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .bridge_newbingfree import preprocess_newbing_out, preprocess_newbing_out_simple
2
+ from multiprocessing import Process, Pipe
3
+ from toolbox import update_ui, get_conf, trimmed_format_exc
4
+ import threading
5
+ import importlib
6
+ import logging
7
+ import time
8
+ from toolbox import get_conf
9
+ import asyncio
10
+ load_message = "正在加载Claude组件,请稍候..."
11
+
12
+ try:
13
+ """
14
+ ========================================================================
15
+ 第一部分:Slack API Client
16
+ https://github.com/yokonsan/claude-in-slack-api
17
+ ========================================================================
18
+ """
19
+
20
+ from slack_sdk.errors import SlackApiError
21
+ from slack_sdk.web.async_client import AsyncWebClient
22
+
23
+ class SlackClient(AsyncWebClient):
24
+ """SlackClient类用于与Slack API进行交互,实现消息发送、接收等功能。
25
+
26
+ 属性:
27
+ - CHANNEL_ID:str类型,表示频道ID。
28
+
29
+ 方法:
30
+ - open_channel():异步方法。通过调用conversations_open方法打开一个频道,并将返回的频道ID保存在属性CHANNEL_ID中。
31
+ - chat(text: str):异步方法。向已打开的频道发送一条文本消息。
32
+ - get_slack_messages():异步方法。获取已打开频道的最新消息并返回消息列表,目前不支持历史消息查询。
33
+ - get_reply():异步方法。循环监听已打开频道的消息,如果收到"Typing…_"结尾的消息说明Claude还在继续输出,否则结束循环。
34
+
35
+ """
36
+ CHANNEL_ID = None
37
+
38
+ async def open_channel(self):
39
+ response = await self.conversations_open(users=get_conf('SLACK_CLAUDE_BOT_ID'))
40
+ self.CHANNEL_ID = response["channel"]["id"]
41
+
42
+ async def chat(self, text):
43
+ if not self.CHANNEL_ID:
44
+ raise Exception("Channel not found.")
45
+
46
+ resp = await self.chat_postMessage(channel=self.CHANNEL_ID, text=text)
47
+ self.LAST_TS = resp["ts"]
48
+
49
+ async def get_slack_messages(self):
50
+ try:
51
+ # TODO:暂时不支持历史消息,因为在同一个频道里存在多人使用时历史消息渗透问题
52
+ resp = await self.conversations_history(channel=self.CHANNEL_ID, oldest=self.LAST_TS, limit=1)
53
+ msg = [msg for msg in resp["messages"]
54
+ if msg.get("user") == get_conf('SLACK_CLAUDE_BOT_ID')]
55
+ return msg
56
+ except (SlackApiError, KeyError) as e:
57
+ raise RuntimeError(f"获取Slack消息失败。")
58
+
59
+ async def get_reply(self):
60
+ while True:
61
+ slack_msgs = await self.get_slack_messages()
62
+ if len(slack_msgs) == 0:
63
+ await asyncio.sleep(0.5)
64
+ continue
65
+
66
+ msg = slack_msgs[-1]
67
+ if msg["text"].endswith("Typing…_"):
68
+ yield False, msg["text"]
69
+ else:
70
+ yield True, msg["text"]
71
+ break
72
+ except:
73
+ pass
74
+
75
+ """
76
+ ========================================================================
77
+ 第二部分:子进程Worker(调用主体)
78
+ ========================================================================
79
+ """
80
+
81
+
82
+ class ClaudeHandle(Process):
83
+ def __init__(self):
84
+ super().__init__(daemon=True)
85
+ self.parent, self.child = Pipe()
86
+ self.claude_model = None
87
+ self.info = ""
88
+ self.success = True
89
+ self.local_history = []
90
+ self.check_dependency()
91
+ if self.success:
92
+ self.start()
93
+ self.threadLock = threading.Lock()
94
+
95
+ def check_dependency(self):
96
+ try:
97
+ self.success = False
98
+ import slack_sdk
99
+ self.info = "依赖检测通过,等待Claude响应。注意目前不能多人同时调用Claude接口(有线程锁),否则将导致每个人的Claude问询历史互相渗透。调用Claude时,会自动使用已配置的代理。"
100
+ self.success = True
101
+ except:
102
+ self.info = "缺少的依赖,如果要使用Claude,除了基础的pip依赖以外,您还需要运行`pip install -r request_llms/requirements_slackclaude.txt`安装Claude的依赖,然后重启程序。"
103
+ self.success = False
104
+
105
+ def ready(self):
106
+ return self.claude_model is not None
107
+
108
+ async def async_run(self):
109
+ await self.claude_model.open_channel()
110
+ while True:
111
+ # 等待
112
+ kwargs = self.child.recv()
113
+ question = kwargs['query']
114
+ history = kwargs['history']
115
+
116
+ # 开始问问题
117
+ prompt = ""
118
+
119
+ # 问题
120
+ prompt += question
121
+ print('question:', prompt)
122
+
123
+ # 提交
124
+ await self.claude_model.chat(prompt)
125
+
126
+ # 获取回复
127
+ async for final, response in self.claude_model.get_reply():
128
+ if not final:
129
+ print(response)
130
+ self.child.send(str(response))
131
+ else:
132
+ # 防止丢失最后一条消息
133
+ slack_msgs = await self.claude_model.get_slack_messages()
134
+ last_msg = slack_msgs[-1]["text"] if slack_msgs and len(slack_msgs) > 0 else ""
135
+ if last_msg:
136
+ self.child.send(last_msg)
137
+ print('-------- receive final ---------')
138
+ self.child.send('[Finish]')
139
+
140
+ def run(self):
141
+ """
142
+ 这个函数运行在子进程
143
+ """
144
+ # 第一次运行,加载参数
145
+ self.success = False
146
+ self.local_history = []
147
+ if (self.claude_model is None) or (not self.success):
148
+ # 代理设置
149
+ proxies = get_conf('proxies')
150
+ if proxies is None:
151
+ self.proxies_https = None
152
+ else:
153
+ self.proxies_https = proxies['https']
154
+
155
+ try:
156
+ SLACK_CLAUDE_USER_TOKEN = get_conf('SLACK_CLAUDE_USER_TOKEN')
157
+ self.claude_model = SlackClient(token=SLACK_CLAUDE_USER_TOKEN, proxy=self.proxies_https)
158
+ print('Claude组件初始化成功。')
159
+ except:
160
+ self.success = False
161
+ tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
162
+ self.child.send(f'[Local Message] 不能加载Claude组件。{tb_str}')
163
+ self.child.send('[Fail]')
164
+ self.child.send('[Finish]')
165
+ raise RuntimeError(f"不能加载Claude组件。")
166
+
167
+ self.success = True
168
+ try:
169
+ # 进入任务等待状态
170
+ asyncio.run(self.async_run())
171
+ except Exception:
172
+ tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
173
+ self.child.send(f'[Local Message] Claude失败 {tb_str}.')
174
+ self.child.send('[Fail]')
175
+ self.child.send('[Finish]')
176
+
177
+ def stream_chat(self, **kwargs):
178
+ """
179
+ 这个函数运行在主进程
180
+ """
181
+ self.threadLock.acquire()
182
+ self.parent.send(kwargs) # 发送请求到子进程
183
+ while True:
184
+ res = self.parent.recv() # 等待Claude回复的片段
185
+ if res == '[Finish]':
186
+ break # 结束
187
+ elif res == '[Fail]':
188
+ self.success = False
189
+ break
190
+ else:
191
+ yield res # Claude回复的片段
192
+ self.threadLock.release()
193
+
194
+
195
+ """
196
+ ========================================================================
197
+ 第三部分:主进程统一调用函数接口
198
+ ========================================================================
199
+ """
200
+ global claude_handle
201
+ claude_handle = None
202
+
203
+
204
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
205
+ """
206
+ 多线程方法
207
+ 函数的说明请见 request_llms/bridge_all.py
208
+ """
209
+ global claude_handle
210
+ if (claude_handle is None) or (not claude_handle.success):
211
+ claude_handle = ClaudeHandle()
212
+ observe_window[0] = load_message + "\n\n" + claude_handle.info
213
+ if not claude_handle.success:
214
+ error = claude_handle.info
215
+ claude_handle = None
216
+ raise RuntimeError(error)
217
+
218
+ # 没有 sys_prompt 接口,因此把prompt加入 history
219
+ history_feedin = []
220
+ for i in range(len(history)//2):
221
+ history_feedin.append([history[2*i], history[2*i+1]])
222
+
223
+ watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
224
+ response = ""
225
+ observe_window[0] = "[Local Message] 等待Claude响应中 ..."
226
+ for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
227
+ observe_window[0] = preprocess_newbing_out_simple(response)
228
+ if len(observe_window) >= 2:
229
+ if (time.time()-observe_window[1]) > watch_dog_patience:
230
+ raise RuntimeError("程序终止。")
231
+ return preprocess_newbing_out_simple(response)
232
+
233
+
234
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
235
+ """
236
+ 单线程方法
237
+ 函数的说明请见 request_llms/bridge_all.py
238
+ """
239
+ chatbot.append((inputs, "[Local Message] 等待Claude响应中 ..."))
240
+
241
+ global claude_handle
242
+ if (claude_handle is None) or (not claude_handle.success):
243
+ claude_handle = ClaudeHandle()
244
+ chatbot[-1] = (inputs, load_message + "\n\n" + claude_handle.info)
245
+ yield from update_ui(chatbot=chatbot, history=[])
246
+ if not claude_handle.success:
247
+ claude_handle = None
248
+ return
249
+
250
+ if additional_fn is not None:
251
+ from core_functional import handle_core_functionality
252
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
253
+
254
+ history_feedin = []
255
+ for i in range(len(history)//2):
256
+ history_feedin.append([history[2*i], history[2*i+1]])
257
+
258
+ chatbot[-1] = (inputs, "[Local Message] 等待Claude响应中 ...")
259
+ response = "[Local Message] 等待Claude响应中 ..."
260
+ yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢,尚未完成全部响应,请耐心完成后再提交新问题。")
261
+ for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt):
262
+ chatbot[-1] = (inputs, preprocess_newbing_out(response))
263
+ yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢,尚未完成全部响应,请耐心完成后再提交新问题。")
264
+ if response == "[Local Message] 等待Claude响应中 ...":
265
+ response = "[Local Message] Claude响应异常,请刷新界面重试 ..."
266
+ history.extend([inputs, response])
267
+ logging.info(f'[raw_input] {inputs}')
268
+ logging.info(f'[response] {response}')
269
+ yield from update_ui(chatbot=chatbot, history=history, msg="完成全部响应,请提交新问题。")
request_llms/bridge_tgui.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ Contributed by SagsMug. Modified by binary-husky
3
+ https://github.com/oobabooga/text-generation-webui/pull/175
4
+ '''
5
+
6
+ import asyncio
7
+ import json
8
+ import random
9
+ import string
10
+ import websockets
11
+ import logging
12
+ import time
13
+ import threading
14
+ import importlib
15
+ from toolbox import get_conf, update_ui
16
+
17
+
18
+ def random_hash():
19
+ letters = string.ascii_lowercase + string.digits
20
+ return ''.join(random.choice(letters) for i in range(9))
21
+
22
+ async def run(context, max_token, temperature, top_p, addr, port):
23
+ params = {
24
+ 'max_new_tokens': max_token,
25
+ 'do_sample': True,
26
+ 'temperature': temperature,
27
+ 'top_p': top_p,
28
+ 'typical_p': 1,
29
+ 'repetition_penalty': 1.05,
30
+ 'encoder_repetition_penalty': 1.0,
31
+ 'top_k': 0,
32
+ 'min_length': 0,
33
+ 'no_repeat_ngram_size': 0,
34
+ 'num_beams': 1,
35
+ 'penalty_alpha': 0,
36
+ 'length_penalty': 1,
37
+ 'early_stopping': True,
38
+ 'seed': -1,
39
+ }
40
+ session = random_hash()
41
+
42
+ async with websockets.connect(f"ws://{addr}:{port}/queue/join") as websocket:
43
+ while content := json.loads(await websocket.recv()):
44
+ #Python3.10 syntax, replace with if elif on older
45
+ if content["msg"] == "send_hash":
46
+ await websocket.send(json.dumps({
47
+ "session_hash": session,
48
+ "fn_index": 12
49
+ }))
50
+ elif content["msg"] == "estimation":
51
+ pass
52
+ elif content["msg"] == "send_data":
53
+ await websocket.send(json.dumps({
54
+ "session_hash": session,
55
+ "fn_index": 12,
56
+ "data": [
57
+ context,
58
+ params['max_new_tokens'],
59
+ params['do_sample'],
60
+ params['temperature'],
61
+ params['top_p'],
62
+ params['typical_p'],
63
+ params['repetition_penalty'],
64
+ params['encoder_repetition_penalty'],
65
+ params['top_k'],
66
+ params['min_length'],
67
+ params['no_repeat_ngram_size'],
68
+ params['num_beams'],
69
+ params['penalty_alpha'],
70
+ params['length_penalty'],
71
+ params['early_stopping'],
72
+ params['seed'],
73
+ ]
74
+ }))
75
+ elif content["msg"] == "process_starts":
76
+ pass
77
+ elif content["msg"] in ["process_generating", "process_completed"]:
78
+ yield content["output"]["data"][0]
79
+ # You can search for your desired end indicator and
80
+ # stop generation by closing the websocket here
81
+ if (content["msg"] == "process_completed"):
82
+ break
83
+
84
+
85
+
86
+
87
+
88
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
89
+ """
90
+ 发送至chatGPT,流式获取输出。
91
+ 用于基础的对话功能。
92
+ inputs 是本次问询的输入
93
+ top_p, temperature是chatGPT的内部调优参数
94
+ history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
95
+ chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
96
+ additional_fn代表点击的哪个按钮,按钮见functional.py
97
+ """
98
+ if additional_fn is not None:
99
+ from core_functional import handle_core_functionality
100
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
101
+
102
+ raw_input = "What I would like to say is the following: " + inputs
103
+ history.extend([inputs, ""])
104
+ chatbot.append([inputs, ""])
105
+ yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
106
+
107
+ prompt = raw_input
108
+ tgui_say = ""
109
+
110
+ model_name, addr_port = llm_kwargs['llm_model'].split('@')
111
+ assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
112
+ addr, port = addr_port.split(':')
113
+
114
+
115
+ mutable = ["", time.time()]
116
+ def run_coorotine(mutable):
117
+ async def get_result(mutable):
118
+ # "tgui:galactica-1.3b@localhost:7860"
119
+
120
+ async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
121
+ temperature=llm_kwargs['temperature'],
122
+ top_p=llm_kwargs['top_p'], addr=addr, port=port):
123
+ print(response[len(mutable[0]):])
124
+ mutable[0] = response
125
+ if (time.time() - mutable[1]) > 3:
126
+ print('exit when no listener')
127
+ break
128
+ asyncio.run(get_result(mutable))
129
+
130
+ thread_listen = threading.Thread(target=run_coorotine, args=(mutable,), daemon=True)
131
+ thread_listen.start()
132
+
133
+ while thread_listen.is_alive():
134
+ time.sleep(1)
135
+ mutable[1] = time.time()
136
+ # Print intermediate steps
137
+ if tgui_say != mutable[0]:
138
+ tgui_say = mutable[0]
139
+ history[-1] = tgui_say
140
+ chatbot[-1] = (history[-2], history[-1])
141
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
142
+
143
+
144
+
145
+
146
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
147
+ raw_input = "What I would like to say is the following: " + inputs
148
+ prompt = raw_input
149
+ tgui_say = ""
150
+ model_name, addr_port = llm_kwargs['llm_model'].split('@')
151
+ assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model']
152
+ addr, port = addr_port.split(':')
153
+
154
+
155
+ def run_coorotine(observe_window):
156
+ async def get_result(observe_window):
157
+ async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
158
+ temperature=llm_kwargs['temperature'],
159
+ top_p=llm_kwargs['top_p'], addr=addr, port=port):
160
+ print(response[len(observe_window[0]):])
161
+ observe_window[0] = response
162
+ if (time.time() - observe_window[1]) > 5:
163
+ print('exit when no listener')
164
+ break
165
+ asyncio.run(get_result(observe_window))
166
+ thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,))
167
+ thread_listen.start()
168
+ return observe_window[0]
request_llms/bridge_zhipu.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import time
3
+ from toolbox import update_ui, get_conf, update_ui_lastest_msg
4
+ from toolbox import check_packages, report_exception
5
+
6
+ model_name = '智谱AI大模型'
7
+
8
+ def validate_key():
9
+ ZHIPUAI_API_KEY = get_conf("ZHIPUAI_API_KEY")
10
+ if ZHIPUAI_API_KEY == '': return False
11
+ return True
12
+
13
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
14
+ """
15
+ ⭐多线程方法
16
+ 函数的说明请见 request_llms/bridge_all.py
17
+ """
18
+ watch_dog_patience = 5
19
+ response = ""
20
+
21
+ if validate_key() is False:
22
+ raise RuntimeError('请配置ZHIPUAI_API_KEY')
23
+
24
+ from .com_zhipuapi import ZhipuRequestInstance
25
+ sri = ZhipuRequestInstance()
26
+ for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
27
+ if len(observe_window) >= 1:
28
+ observe_window[0] = response
29
+ if len(observe_window) >= 2:
30
+ if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
31
+ return response
32
+
33
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
34
+ """
35
+ ⭐单线程方法
36
+ 函数的说明请见 request_llms/bridge_all.py
37
+ """
38
+ chatbot.append((inputs, ""))
39
+ yield from update_ui(chatbot=chatbot, history=history)
40
+
41
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
42
+ try:
43
+ check_packages(["zhipuai"])
44
+ except:
45
+ yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖,安装方法```pip install --upgrade zhipuai```。",
46
+ chatbot=chatbot, history=history, delay=0)
47
+ return
48
+
49
+ if validate_key() is False:
50
+ yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置ZHIPUAI_API_KEY", chatbot=chatbot, history=history, delay=0)
51
+ return
52
+
53
+ if additional_fn is not None:
54
+ from core_functional import handle_core_functionality
55
+ inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
56
+
57
+ # 开始接收回复
58
+ from .com_zhipuapi import ZhipuRequestInstance
59
+ sri = ZhipuRequestInstance()
60
+ for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
61
+ chatbot[-1] = (inputs, response)
62
+ yield from update_ui(chatbot=chatbot, history=history)
63
+
64
+ # 总结输出
65
+ if response == f"[Local Message] 等待{model_name}响应中 ...":
66
+ response = f"[Local Message] {model_name}响应异常 ..."
67
+ history.extend([inputs, response])
68
+ yield from update_ui(chatbot=chatbot, history=history)
request_llms/com_google.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ # @Time : 2023/12/25
3
+ # @Author : Spike
4
+ # @Descr :
5
+ import json
6
+ import os
7
+ import re
8
+ import requests
9
+ from typing import List, Dict, Tuple
10
+ from toolbox import get_conf, encode_image, get_pictures_list
11
+
12
+ proxies, TIMEOUT_SECONDS = get_conf("proxies", "TIMEOUT_SECONDS")
13
+
14
+ """
15
+ ========================================================================
16
+ 第五部分 一些文件处理方法
17
+ files_filter_handler 根据type过滤文件
18
+ input_encode_handler 提取input中的文件,并解析
19
+ file_manifest_filter_html 根据type过滤文件, 并解析为html or md 文本
20
+ link_mtime_to_md 文件增加本地时间参数,避免下载到缓存文件
21
+ html_view_blank 超链接
22
+ html_local_file 本地文件取相对路径
23
+ to_markdown_tabs 文件list 转换为 md tab
24
+ """
25
+
26
+
27
+ def files_filter_handler(file_list):
28
+ new_list = []
29
+ filter_ = [
30
+ "png",
31
+ "jpg",
32
+ "jpeg",
33
+ "bmp",
34
+ "svg",
35
+ "webp",
36
+ "ico",
37
+ "tif",
38
+ "tiff",
39
+ "raw",
40
+ "eps",
41
+ ]
42
+ for file in file_list:
43
+ file = str(file).replace("file=", "")
44
+ if os.path.exists(file):
45
+ if str(os.path.basename(file)).split(".")[-1] in filter_:
46
+ new_list.append(file)
47
+ return new_list
48
+
49
+
50
+ def input_encode_handler(inputs, llm_kwargs):
51
+ if llm_kwargs["most_recent_uploaded"].get("path"):
52
+ image_paths = get_pictures_list(llm_kwargs["most_recent_uploaded"]["path"])
53
+ md_encode = []
54
+ for md_path in image_paths:
55
+ type_ = os.path.splitext(md_path)[1].replace(".", "")
56
+ type_ = "jpeg" if type_ == "jpg" else type_
57
+ md_encode.append({"data": encode_image(md_path), "type": type_})
58
+ return inputs, md_encode
59
+
60
+
61
+ def file_manifest_filter_html(file_list, filter_: list = None, md_type=False):
62
+ new_list = []
63
+ if not filter_:
64
+ filter_ = [
65
+ "png",
66
+ "jpg",
67
+ "jpeg",
68
+ "bmp",
69
+ "svg",
70
+ "webp",
71
+ "ico",
72
+ "tif",
73
+ "tiff",
74
+ "raw",
75
+ "eps",
76
+ ]
77
+ for file in file_list:
78
+ if str(os.path.basename(file)).split(".")[-1] in filter_:
79
+ new_list.append(html_local_img(file, md=md_type))
80
+ elif os.path.exists(file):
81
+ new_list.append(link_mtime_to_md(file))
82
+ else:
83
+ new_list.append(file)
84
+ return new_list
85
+
86
+
87
+ def link_mtime_to_md(file):
88
+ link_local = html_local_file(file)
89
+ link_name = os.path.basename(file)
90
+ a = f"[{link_name}]({link_local}?{os.path.getmtime(file)})"
91
+ return a
92
+
93
+
94
+ def html_local_file(file):
95
+ base_path = os.path.dirname(__file__) # 项目目录
96
+ if os.path.exists(str(file)):
97
+ file = f'file={file.replace(base_path, ".")}'
98
+ return file
99
+
100
+
101
+ def html_local_img(__file, layout="left", max_width=None, max_height=None, md=True):
102
+ style = ""
103
+ if max_width is not None:
104
+ style += f"max-width: {max_width};"
105
+ if max_height is not None:
106
+ style += f"max-height: {max_height};"
107
+ __file = html_local_file(__file)
108
+ a = f'<div align="{layout}"><img src="{__file}" style="{style}"></div>'
109
+ if md:
110
+ a = f"![{__file}]({__file})"
111
+ return a
112
+
113
+
114
+ def to_markdown_tabs(head: list, tabs: list, alignment=":---:", column=False):
115
+ """
116
+ Args:
117
+ head: 表头:[]
118
+ tabs: 表值:[[列1], [列2], [列3], [列4]]
119
+ alignment: :--- 左对齐, :---: 居中对齐, ---: 右对齐
120
+ column: True to keep data in columns, False to keep data in rows (default).
121
+ Returns:
122
+ A string representation of the markdown table.
123
+ """
124
+ if column:
125
+ transposed_tabs = list(map(list, zip(*tabs)))
126
+ else:
127
+ transposed_tabs = tabs
128
+ # Find the maximum length among the columns
129
+ max_len = max(len(column) for column in transposed_tabs)
130
+
131
+ tab_format = "| %s "
132
+ tabs_list = "".join([tab_format % i for i in head]) + "|\n"
133
+ tabs_list += "".join([tab_format % alignment for i in head]) + "|\n"
134
+
135
+ for i in range(max_len):
136
+ row_data = [tab[i] if i < len(tab) else "" for tab in transposed_tabs]
137
+ row_data = file_manifest_filter_html(row_data, filter_=None)
138
+ tabs_list += "".join([tab_format % i for i in row_data]) + "|\n"
139
+
140
+ return tabs_list
141
+
142
+
143
+ class GoogleChatInit:
144
+ def __init__(self):
145
+ self.url_gemini = "https://generativelanguage.googleapis.com/v1beta/models/%m:streamGenerateContent?key=%k"
146
+
147
+ def generate_chat(self, inputs, llm_kwargs, history, system_prompt):
148
+ headers, payload = self.generate_message_payload(
149
+ inputs, llm_kwargs, history, system_prompt
150
+ )
151
+ response = requests.post(
152
+ url=self.url_gemini,
153
+ headers=headers,
154
+ data=json.dumps(payload),
155
+ stream=True,
156
+ proxies=proxies,
157
+ timeout=TIMEOUT_SECONDS,
158
+ )
159
+ return response.iter_lines()
160
+
161
+ def __conversation_user(self, user_input, llm_kwargs):
162
+ what_i_have_asked = {"role": "user", "parts": []}
163
+ if "vision" not in self.url_gemini:
164
+ input_ = user_input
165
+ encode_img = []
166
+ else:
167
+ input_, encode_img = input_encode_handler(user_input, llm_kwargs=llm_kwargs)
168
+ what_i_have_asked["parts"].append({"text": input_})
169
+ if encode_img:
170
+ for data in encode_img:
171
+ what_i_have_asked["parts"].append(
172
+ {
173
+ "inline_data": {
174
+ "mime_type": f"image/{data['type']}",
175
+ "data": data["data"],
176
+ }
177
+ }
178
+ )
179
+ return what_i_have_asked
180
+
181
+ def __conversation_history(self, history, llm_kwargs):
182
+ messages = []
183
+ conversation_cnt = len(history) // 2
184
+ if conversation_cnt:
185
+ for index in range(0, 2 * conversation_cnt, 2):
186
+ what_i_have_asked = self.__conversation_user(history[index], llm_kwargs)
187
+ what_gpt_answer = {
188
+ "role": "model",
189
+ "parts": [{"text": history[index + 1]}],
190
+ }
191
+ messages.append(what_i_have_asked)
192
+ messages.append(what_gpt_answer)
193
+ return messages
194
+
195
+ def generate_message_payload(
196
+ self, inputs, llm_kwargs, history, system_prompt
197
+ ) -> Tuple[Dict, Dict]:
198
+ messages = [
199
+ # {"role": "system", "parts": [{"text": system_prompt}]}, # gemini 不允许对话轮次为偶数,所以这个没有用,看后续支持吧。。。
200
+ # {"role": "user", "parts": [{"text": ""}]},
201
+ # {"role": "model", "parts": [{"text": ""}]}
202
+ ]
203
+ self.url_gemini = self.url_gemini.replace(
204
+ "%m", llm_kwargs["llm_model"]
205
+ ).replace("%k", get_conf("GEMINI_API_KEY"))
206
+ header = {"Content-Type": "application/json"}
207
+ if "vision" not in self.url_gemini: # 不是vision 才处理history
208
+ messages.extend(
209
+ self.__conversation_history(history, llm_kwargs)
210
+ ) # 处理 history
211
+ messages.append(self.__conversation_user(inputs, llm_kwargs)) # 处理用户对话
212
+ payload = {
213
+ "contents": messages,
214
+ "generationConfig": {
215
+ # "maxOutputTokens": 800,
216
+ "stopSequences": str(llm_kwargs.get("stop", "")).split(" "),
217
+ "temperature": llm_kwargs.get("temperature", 1),
218
+ "topP": llm_kwargs.get("top_p", 0.8),
219
+ "topK": 10,
220
+ },
221
+ }
222
+ return header, payload
223
+
224
+
225
+ if __name__ == "__main__":
226
+ google = GoogleChatInit()
227
+ # print(gootle.generate_message_payload('你好呀', {}, ['123123', '3123123'], ''))
228
+ # gootle.input_encode_handle('123123[123123](./123123), ![53425](./asfafa/fff.jpg)')
request_llms/com_qwenapi.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from http import HTTPStatus
2
+ from toolbox import get_conf
3
+ import threading
4
+ import logging
5
+
6
+ timeout_bot_msg = '[Local Message] Request timeout. Network error.'
7
+
8
+ class QwenRequestInstance():
9
+ def __init__(self):
10
+ import dashscope
11
+ self.time_to_yield_event = threading.Event()
12
+ self.time_to_exit_event = threading.Event()
13
+ self.result_buf = ""
14
+
15
+ def validate_key():
16
+ DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY")
17
+ if DASHSCOPE_API_KEY == '': return False
18
+ return True
19
+
20
+ if not validate_key():
21
+ raise RuntimeError('请配置 DASHSCOPE_API_KEY')
22
+ dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
23
+
24
+
25
+ def generate(self, inputs, llm_kwargs, history, system_prompt):
26
+ # import _thread as thread
27
+ from dashscope import Generation
28
+ QWEN_MODEL = {
29
+ 'qwen-turbo': Generation.Models.qwen_turbo,
30
+ 'qwen-plus': Generation.Models.qwen_plus,
31
+ 'qwen-max': Generation.Models.qwen_max,
32
+ }[llm_kwargs['llm_model']]
33
+ top_p = llm_kwargs.get('top_p', 0.8)
34
+ if top_p == 0: top_p += 1e-5
35
+ if top_p == 1: top_p -= 1e-5
36
+
37
+ self.result_buf = ""
38
+ responses = Generation.call(
39
+ model=QWEN_MODEL,
40
+ messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
41
+ top_p=top_p,
42
+ temperature=llm_kwargs.get('temperature', 1.0),
43
+ result_format='message',
44
+ stream=True,
45
+ incremental_output=True
46
+ )
47
+
48
+ for response in responses:
49
+ if response.status_code == HTTPStatus.OK:
50
+ if response.output.choices[0].finish_reason == 'stop':
51
+ yield self.result_buf
52
+ break
53
+ elif response.output.choices[0].finish_reason == 'length':
54
+ self.result_buf += "[Local Message] 生成长度过长,后续输出被截断"
55
+ yield self.result_buf
56
+ break
57
+ else:
58
+ self.result_buf += response.output.choices[0].message.content
59
+ yield self.result_buf
60
+ else:
61
+ self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}"
62
+ yield self.result_buf
63
+ break
64
+ logging.info(f'[raw_input] {inputs}')
65
+ logging.info(f'[response] {self.result_buf}')
66
+ return self.result_buf
67
+
68
+
69
+ def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
70
+ conversation_cnt = len(history) // 2
71
+ if system_prompt == '': system_prompt = 'Hello!'
72
+ messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
73
+ if conversation_cnt:
74
+ for index in range(0, 2*conversation_cnt, 2):
75
+ what_i_have_asked = {}
76
+ what_i_have_asked["role"] = "user"
77
+ what_i_have_asked["content"] = history[index]
78
+ what_gpt_answer = {}
79
+ what_gpt_answer["role"] = "assistant"
80
+ what_gpt_answer["content"] = history[index+1]
81
+ if what_i_have_asked["content"] != "":
82
+ if what_gpt_answer["content"] == "":
83
+ continue
84
+ if what_gpt_answer["content"] == timeout_bot_msg:
85
+ continue
86
+ messages.append(what_i_have_asked)
87
+ messages.append(what_gpt_answer)
88
+ else:
89
+ messages[-1]['content'] = what_gpt_answer['content']
90
+ what_i_ask_now = {}
91
+ what_i_ask_now["role"] = "user"
92
+ what_i_ask_now["content"] = inputs
93
+ messages.append(what_i_ask_now)
94
+ return messages
request_llms/com_sparkapi.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import get_conf, get_pictures_list, encode_image
2
+ import base64
3
+ import datetime
4
+ import hashlib
5
+ import hmac
6
+ import json
7
+ from urllib.parse import urlparse
8
+ import ssl
9
+ from datetime import datetime
10
+ from time import mktime
11
+ from urllib.parse import urlencode
12
+ from wsgiref.handlers import format_date_time
13
+ import websocket
14
+ import threading, time
15
+
16
+ timeout_bot_msg = '[Local Message] Request timeout. Network error.'
17
+
18
+ class Ws_Param(object):
19
+ # 初始化
20
+ def __init__(self, APPID, APIKey, APISecret, gpt_url):
21
+ self.APPID = APPID
22
+ self.APIKey = APIKey
23
+ self.APISecret = APISecret
24
+ self.host = urlparse(gpt_url).netloc
25
+ self.path = urlparse(gpt_url).path
26
+ self.gpt_url = gpt_url
27
+
28
+ # 生成url
29
+ def create_url(self):
30
+ # 生成RFC1123格式的时间戳
31
+ now = datetime.now()
32
+ date = format_date_time(mktime(now.timetuple()))
33
+
34
+ # 拼接字符串
35
+ signature_origin = "host: " + self.host + "\n"
36
+ signature_origin += "date: " + date + "\n"
37
+ signature_origin += "GET " + self.path + " HTTP/1.1"
38
+
39
+ # 进行hmac-sha256进行加密
40
+ signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'), digestmod=hashlib.sha256).digest()
41
+ signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')
42
+ authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
43
+ authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
44
+
45
+ # 将请求的鉴权参数组合为字典
46
+ v = {
47
+ "authorization": authorization,
48
+ "date": date,
49
+ "host": self.host
50
+ }
51
+ # 拼接鉴权参数,生成url
52
+ url = self.gpt_url + '?' + urlencode(v)
53
+ # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释,比对相同参数时生成的url与自己代码生成的url是否一致
54
+ return url
55
+
56
+
57
+
58
+ class SparkRequestInstance():
59
+ def __init__(self):
60
+ XFYUN_APPID, XFYUN_API_SECRET, XFYUN_API_KEY = get_conf('XFYUN_APPID', 'XFYUN_API_SECRET', 'XFYUN_API_KEY')
61
+ if XFYUN_APPID == '00000000' or XFYUN_APPID == '': raise RuntimeError('请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET')
62
+ self.appid = XFYUN_APPID
63
+ self.api_secret = XFYUN_API_SECRET
64
+ self.api_key = XFYUN_API_KEY
65
+ self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat"
66
+ self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat"
67
+ self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat"
68
+ self.gpt_url_img = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image"
69
+
70
+ self.time_to_yield_event = threading.Event()
71
+ self.time_to_exit_event = threading.Event()
72
+
73
+ self.result_buf = ""
74
+
75
+ def generate(self, inputs, llm_kwargs, history, system_prompt, use_image_api=False):
76
+ llm_kwargs = llm_kwargs
77
+ history = history
78
+ system_prompt = system_prompt
79
+ import _thread as thread
80
+ thread.start_new_thread(self.create_blocking_request, (inputs, llm_kwargs, history, system_prompt, use_image_api))
81
+ while True:
82
+ self.time_to_yield_event.wait(timeout=1)
83
+ if self.time_to_yield_event.is_set():
84
+ yield self.result_buf
85
+ if self.time_to_exit_event.is_set():
86
+ return self.result_buf
87
+
88
+
89
+ def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt, use_image_api):
90
+ if llm_kwargs['llm_model'] == 'sparkv2':
91
+ gpt_url = self.gpt_url_v2
92
+ elif llm_kwargs['llm_model'] == 'sparkv3':
93
+ gpt_url = self.gpt_url_v3
94
+ else:
95
+ gpt_url = self.gpt_url
96
+ file_manifest = []
97
+ if use_image_api and llm_kwargs.get('most_recent_uploaded'):
98
+ if llm_kwargs['most_recent_uploaded'].get('path'):
99
+ file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path'])
100
+ if len(file_manifest) > 0:
101
+ print('正在使用讯飞图片理解API')
102
+ gpt_url = self.gpt_url_img
103
+ wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url)
104
+ websocket.enableTrace(False)
105
+ wsUrl = wsParam.create_url()
106
+
107
+ # 收到websocket连接建立的处理
108
+ def on_open(ws):
109
+ import _thread as thread
110
+ thread.start_new_thread(run, (ws,))
111
+ def run(ws, *args):
112
+ data = json.dumps(gen_params(ws.appid, *ws.all_args, file_manifest))
113
+ ws.send(data)
114
+
115
+ # 收到websocket消息的处理
116
+ def on_message(ws, message):
117
+ data = json.loads(message)
118
+ code = data['header']['code']
119
+ if code != 0:
120
+ print(f'请求错误: {code}, {data}')
121
+ self.result_buf += str(data)
122
+ ws.close()
123
+ self.time_to_exit_event.set()
124
+ else:
125
+ choices = data["payload"]["choices"]
126
+ status = choices["status"]
127
+ content = choices["text"][0]["content"]
128
+ ws.content += content
129
+ self.result_buf += content
130
+ if status == 2:
131
+ ws.close()
132
+ self.time_to_exit_event.set()
133
+ self.time_to_yield_event.set()
134
+
135
+ # 收到websocket错误的处理
136
+ def on_error(ws, error):
137
+ print("error:", error)
138
+ self.time_to_exit_event.set()
139
+
140
+ # 收到websocket关闭的处理
141
+ def on_close(ws, *args):
142
+ self.time_to_exit_event.set()
143
+
144
+ # websocket
145
+ ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
146
+ ws.appid = self.appid
147
+ ws.content = ""
148
+ ws.all_args = (inputs, llm_kwargs, history, system_prompt)
149
+ ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
150
+
151
+ def generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest):
152
+ conversation_cnt = len(history) // 2
153
+ messages = []
154
+ if file_manifest:
155
+ base64_images = []
156
+ for image_path in file_manifest:
157
+ base64_images.append(encode_image(image_path))
158
+ for img_s in base64_images:
159
+ if img_s not in str(messages):
160
+ messages.append({"role": "user", "content": img_s, "content_type": "image"})
161
+ else:
162
+ messages = [{"role": "system", "content": system_prompt}]
163
+ if conversation_cnt:
164
+ for index in range(0, 2*conversation_cnt, 2):
165
+ what_i_have_asked = {}
166
+ what_i_have_asked["role"] = "user"
167
+ what_i_have_asked["content"] = history[index]
168
+ what_gpt_answer = {}
169
+ what_gpt_answer["role"] = "assistant"
170
+ what_gpt_answer["content"] = history[index+1]
171
+ if what_i_have_asked["content"] != "":
172
+ if what_gpt_answer["content"] == "": continue
173
+ if what_gpt_answer["content"] == timeout_bot_msg: continue
174
+ messages.append(what_i_have_asked)
175
+ messages.append(what_gpt_answer)
176
+ else:
177
+ messages[-1]['content'] = what_gpt_answer['content']
178
+ what_i_ask_now = {}
179
+ what_i_ask_now["role"] = "user"
180
+ what_i_ask_now["content"] = inputs
181
+ messages.append(what_i_ask_now)
182
+ return messages
183
+
184
+
185
+ def gen_params(appid, inputs, llm_kwargs, history, system_prompt, file_manifest):
186
+ """
187
+ 通过appid和用户的提问来生成请参数
188
+ """
189
+ domains = {
190
+ "spark": "general",
191
+ "sparkv2": "generalv2",
192
+ "sparkv3": "generalv3",
193
+ }
194
+ domains_select = domains[llm_kwargs['llm_model']]
195
+ if file_manifest: domains_select = 'image'
196
+ data = {
197
+ "header": {
198
+ "app_id": appid,
199
+ "uid": "1234"
200
+ },
201
+ "parameter": {
202
+ "chat": {
203
+ "domain": domains_select,
204
+ "temperature": llm_kwargs["temperature"],
205
+ "random_threshold": 0.5,
206
+ "max_tokens": 4096,
207
+ "auditing": "default"
208
+ }
209
+ },
210
+ "payload": {
211
+ "message": {
212
+ "text": generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest)
213
+ }
214
+ }
215
+ }
216
+ return data
217
+
request_llms/com_zhipuapi.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import get_conf
2
+ import threading
3
+ import logging
4
+
5
+ timeout_bot_msg = '[Local Message] Request timeout. Network error.'
6
+
7
+ class ZhipuRequestInstance():
8
+ def __init__(self):
9
+
10
+ self.time_to_yield_event = threading.Event()
11
+ self.time_to_exit_event = threading.Event()
12
+
13
+ self.result_buf = ""
14
+
15
+ def generate(self, inputs, llm_kwargs, history, system_prompt):
16
+ # import _thread as thread
17
+ import zhipuai
18
+ ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL")
19
+ zhipuai.api_key = ZHIPUAI_API_KEY
20
+ self.result_buf = ""
21
+ response = zhipuai.model_api.sse_invoke(
22
+ model=ZHIPUAI_MODEL,
23
+ prompt=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
24
+ top_p=llm_kwargs['top_p'],
25
+ temperature=llm_kwargs['temperature'],
26
+ )
27
+ for event in response.events():
28
+ if event.event == "add":
29
+ self.result_buf += event.data
30
+ yield self.result_buf
31
+ elif event.event == "error" or event.event == "interrupted":
32
+ raise RuntimeError("Unknown error:" + event.data)
33
+ elif event.event == "finish":
34
+ yield self.result_buf
35
+ break
36
+ else:
37
+ raise RuntimeError("Unknown error:" + str(event))
38
+
39
+ logging.info(f'[raw_input] {inputs}')
40
+ logging.info(f'[response] {self.result_buf}')
41
+ return self.result_buf
42
+
43
+ def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
44
+ conversation_cnt = len(history) // 2
45
+ messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
46
+ if conversation_cnt:
47
+ for index in range(0, 2*conversation_cnt, 2):
48
+ what_i_have_asked = {}
49
+ what_i_have_asked["role"] = "user"
50
+ what_i_have_asked["content"] = history[index]
51
+ what_gpt_answer = {}
52
+ what_gpt_answer["role"] = "assistant"
53
+ what_gpt_answer["content"] = history[index+1]
54
+ if what_i_have_asked["content"] != "":
55
+ if what_gpt_answer["content"] == "":
56
+ continue
57
+ if what_gpt_answer["content"] == timeout_bot_msg:
58
+ continue
59
+ messages.append(what_i_have_asked)
60
+ messages.append(what_gpt_answer)
61
+ else:
62
+ messages[-1]['content'] = what_gpt_answer['content']
63
+ what_i_ask_now = {}
64
+ what_i_ask_now["role"] = "user"
65
+ what_i_ask_now["content"] = inputs
66
+ messages.append(what_i_ask_now)
67
+ return messages
request_llms/edge_gpt_free.py ADDED
@@ -0,0 +1,1125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ========================================================================
3
+ 第一部分:来自EdgeGPT.py
4
+ https://github.com/acheong08/EdgeGPT
5
+ ========================================================================
6
+ """
7
+ """
8
+ Main.py
9
+ """
10
+
11
+ import argparse
12
+ import asyncio
13
+ import json
14
+ import os
15
+ import random
16
+ import re
17
+ import ssl
18
+ import sys
19
+ import time
20
+ import uuid
21
+ from enum import Enum
22
+ from pathlib import Path
23
+ from typing import Generator
24
+ from typing import Literal
25
+ from typing import Optional
26
+ from typing import Union
27
+
28
+ import aiohttp
29
+ import certifi
30
+ import httpx
31
+ from prompt_toolkit import PromptSession
32
+ from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
33
+ from prompt_toolkit.completion import WordCompleter
34
+ from prompt_toolkit.history import InMemoryHistory
35
+ from prompt_toolkit.key_binding import KeyBindings
36
+ from rich.live import Live
37
+ from rich.markdown import Markdown
38
+
39
+ DELIMITER = "\x1e"
40
+
41
+
42
+ # Generate random IP between range 13.104.0.0/14
43
+ FORWARDED_IP = (
44
+ f"13.{random.randint(104, 107)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
45
+ )
46
+
47
+ HEADERS = {
48
+ "accept": "application/json",
49
+ "accept-language": "en-US,en;q=0.9",
50
+ "content-type": "application/json",
51
+ "sec-ch-ua": '"Not_A Brand";v="99", "Microsoft Edge";v="110", "Chromium";v="110"',
52
+ "sec-ch-ua-arch": '"x86"',
53
+ "sec-ch-ua-bitness": '"64"',
54
+ "sec-ch-ua-full-version": '"109.0.1518.78"',
55
+ "sec-ch-ua-full-version-list": '"Chromium";v="110.0.5481.192", "Not A(Brand";v="24.0.0.0", "Microsoft Edge";v="110.0.1587.69"',
56
+ "sec-ch-ua-mobile": "?0",
57
+ "sec-ch-ua-model": "",
58
+ "sec-ch-ua-platform": '"Windows"',
59
+ "sec-ch-ua-platform-version": '"15.0.0"',
60
+ "sec-fetch-dest": "empty",
61
+ "sec-fetch-mode": "cors",
62
+ "sec-fetch-site": "same-origin",
63
+ "x-ms-client-request-id": str(uuid.uuid4()),
64
+ "x-ms-useragent": "azsdk-js-api-client-factory/1.0.0-beta.1 core-rest-pipeline/1.10.0 OS/Win32",
65
+ "Referer": "https://www.bing.com/search?q=Bing+AI&showconv=1&FORM=hpcodx",
66
+ "Referrer-Policy": "origin-when-cross-origin",
67
+ "x-forwarded-for": FORWARDED_IP,
68
+ }
69
+
70
+ HEADERS_INIT_CONVER = {
71
+ "authority": "edgeservices.bing.com",
72
+ "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
73
+ "accept-language": "en-US,en;q=0.9",
74
+ "cache-control": "max-age=0",
75
+ "sec-ch-ua": '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
76
+ "sec-ch-ua-arch": '"x86"',
77
+ "sec-ch-ua-bitness": '"64"',
78
+ "sec-ch-ua-full-version": '"110.0.1587.69"',
79
+ "sec-ch-ua-full-version-list": '"Chromium";v="110.0.5481.192", "Not A(Brand";v="24.0.0.0", "Microsoft Edge";v="110.0.1587.69"',
80
+ "sec-ch-ua-mobile": "?0",
81
+ "sec-ch-ua-model": '""',
82
+ "sec-ch-ua-platform": '"Windows"',
83
+ "sec-ch-ua-platform-version": '"15.0.0"',
84
+ "sec-fetch-dest": "document",
85
+ "sec-fetch-mode": "navigate",
86
+ "sec-fetch-site": "none",
87
+ "sec-fetch-user": "?1",
88
+ "upgrade-insecure-requests": "1",
89
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.69",
90
+ "x-edge-shopping-flag": "1",
91
+ "x-forwarded-for": FORWARDED_IP,
92
+ }
93
+
94
+ ssl_context = ssl.create_default_context()
95
+ ssl_context.load_verify_locations(certifi.where())
96
+
97
+
98
+ class NotAllowedToAccess(Exception):
99
+ pass
100
+
101
+
102
+ class ConversationStyle(Enum):
103
+ creative = [
104
+ "nlu_direct_response_filter",
105
+ "deepleo",
106
+ "disable_emoji_spoken_text",
107
+ "responsible_ai_policy_235",
108
+ "enablemm",
109
+ "h3imaginative",
110
+ "travelansgnd",
111
+ "dv3sugg",
112
+ "clgalileo",
113
+ "gencontentv3",
114
+ "dv3sugg",
115
+ "responseos",
116
+ "e2ecachewrite",
117
+ "cachewriteext",
118
+ "nodlcpcwrite",
119
+ "travelansgnd",
120
+ "nojbfedge",
121
+ ]
122
+ balanced = [
123
+ "nlu_direct_response_filter",
124
+ "deepleo",
125
+ "disable_emoji_spoken_text",
126
+ "responsible_ai_policy_235",
127
+ "enablemm",
128
+ "galileo",
129
+ "dv3sugg",
130
+ "responseos",
131
+ "e2ecachewrite",
132
+ "cachewriteext",
133
+ "nodlcpcwrite",
134
+ "travelansgnd",
135
+ "nojbfedge",
136
+ ]
137
+ precise = [
138
+ "nlu_direct_response_filter",
139
+ "deepleo",
140
+ "disable_emoji_spoken_text",
141
+ "responsible_ai_policy_235",
142
+ "enablemm",
143
+ "galileo",
144
+ "dv3sugg",
145
+ "responseos",
146
+ "e2ecachewrite",
147
+ "cachewriteext",
148
+ "nodlcpcwrite",
149
+ "travelansgnd",
150
+ "h3precise",
151
+ "clgalileo",
152
+ "nojbfedge",
153
+ ]
154
+
155
+
156
+ CONVERSATION_STYLE_TYPE = Optional[
157
+ Union[ConversationStyle, Literal["creative", "balanced", "precise"]]
158
+ ]
159
+
160
+
161
+ def _append_identifier(msg: dict) -> str:
162
+ """
163
+ Appends special character to end of message to identify end of message
164
+ """
165
+ # Convert dict to json string
166
+ return json.dumps(msg, ensure_ascii=False) + DELIMITER
167
+
168
+
169
+ def _get_ran_hex(length: int = 32) -> str:
170
+ """
171
+ Returns random hex string
172
+ """
173
+ return "".join(random.choice("0123456789abcdef") for _ in range(length))
174
+
175
+
176
+ class _ChatHubRequest:
177
+ """
178
+ Request object for ChatHub
179
+ """
180
+
181
+ def __init__(
182
+ self,
183
+ conversation_signature: str,
184
+ client_id: str,
185
+ conversation_id: str,
186
+ invocation_id: int = 0,
187
+ ) -> None:
188
+ self.struct: dict = {}
189
+
190
+ self.client_id: str = client_id
191
+ self.conversation_id: str = conversation_id
192
+ self.conversation_signature: str = conversation_signature
193
+ self.invocation_id: int = invocation_id
194
+
195
+ def update(
196
+ self,
197
+ prompt: str,
198
+ conversation_style: CONVERSATION_STYLE_TYPE,
199
+ options = None,
200
+ webpage_context = None,
201
+ search_result = False,
202
+ ) -> None:
203
+ """
204
+ Updates request object
205
+ """
206
+ if options is None:
207
+ options = [
208
+ "deepleo",
209
+ "enable_debug_commands",
210
+ "disable_emoji_spoken_text",
211
+ "enablemm",
212
+ ]
213
+ if conversation_style:
214
+ if not isinstance(conversation_style, ConversationStyle):
215
+ conversation_style = getattr(ConversationStyle, conversation_style)
216
+ options = conversation_style.value
217
+ self.struct = {
218
+ "arguments": [
219
+ {
220
+ "source": "cib",
221
+ "optionsSets": options,
222
+ "allowedMessageTypes": [
223
+ "Chat",
224
+ "Disengaged",
225
+ "AdsQuery",
226
+ "SemanticSerp",
227
+ "GenerateContentQuery",
228
+ "SearchQuery",
229
+ ],
230
+ "sliceIds": [
231
+ "chk1cf",
232
+ "nopreloadsscf",
233
+ "winlongmsg2tf",
234
+ "perfimpcomb",
235
+ "sugdivdis",
236
+ "sydnoinputt",
237
+ "wpcssopt",
238
+ "wintone2tf",
239
+ "0404sydicnbs0",
240
+ "405suggbs0",
241
+ "scctl",
242
+ "330uaugs0",
243
+ "0329resp",
244
+ "udscahrfon",
245
+ "udstrblm5",
246
+ "404e2ewrt",
247
+ "408nodedups0",
248
+ "403tvlansgnd",
249
+ ],
250
+ "traceId": _get_ran_hex(32),
251
+ "isStartOfSession": self.invocation_id == 0,
252
+ "message": {
253
+ "author": "user",
254
+ "inputMethod": "Keyboard",
255
+ "text": prompt,
256
+ "messageType": "Chat",
257
+ },
258
+ "conversationSignature": self.conversation_signature,
259
+ "participant": {
260
+ "id": self.client_id,
261
+ },
262
+ "conversationId": self.conversation_id,
263
+ },
264
+ ],
265
+ "invocationId": str(self.invocation_id),
266
+ "target": "chat",
267
+ "type": 4,
268
+ }
269
+ if search_result:
270
+ have_search_result = [
271
+ "InternalSearchQuery",
272
+ "InternalSearchResult",
273
+ "InternalLoaderMessage",
274
+ "RenderCardRequest",
275
+ ]
276
+ self.struct["arguments"][0]["allowedMessageTypes"] += have_search_result
277
+ if webpage_context:
278
+ self.struct["arguments"][0]["previousMessages"] = [
279
+ {
280
+ "author": "user",
281
+ "description": webpage_context,
282
+ "contextType": "WebPage",
283
+ "messageType": "Context",
284
+ "messageId": "discover-web--page-ping-mriduna-----",
285
+ },
286
+ ]
287
+ self.invocation_id += 1
288
+
289
+
290
+ class _Conversation:
291
+ """
292
+ Conversation API
293
+ """
294
+
295
+ def __init__(
296
+ self,
297
+ proxy = None,
298
+ async_mode = False,
299
+ cookies = None,
300
+ ) -> None:
301
+ if async_mode:
302
+ return
303
+ self.struct: dict = {
304
+ "conversationId": None,
305
+ "clientId": None,
306
+ "conversationSignature": None,
307
+ "result": {"value": "Success", "message": None},
308
+ }
309
+ self.proxy = proxy
310
+ proxy = (
311
+ proxy
312
+ or os.environ.get("all_proxy")
313
+ or os.environ.get("ALL_PROXY")
314
+ or os.environ.get("https_proxy")
315
+ or os.environ.get("HTTPS_PROXY")
316
+ or None
317
+ )
318
+ if proxy is not None and proxy.startswith("socks5h://"):
319
+ proxy = "socks5://" + proxy[len("socks5h://") :]
320
+ self.session = httpx.Client(
321
+ proxies=proxy,
322
+ timeout=30,
323
+ headers=HEADERS_INIT_CONVER,
324
+ )
325
+ if cookies:
326
+ for cookie in cookies:
327
+ self.session.cookies.set(cookie["name"], cookie["value"])
328
+ # Send GET request
329
+ response = self.session.get(
330
+ url=os.environ.get("BING_PROXY_URL")
331
+ or "https://edgeservices.bing.com/edgesvc/turing/conversation/create",
332
+ )
333
+ if response.status_code != 200:
334
+ response = self.session.get(
335
+ "https://edge.churchless.tech/edgesvc/turing/conversation/create",
336
+ )
337
+ if response.status_code != 200:
338
+ print(f"Status code: {response.status_code}")
339
+ print(response.text)
340
+ print(response.url)
341
+ raise Exception("Authentication failed")
342
+ try:
343
+ self.struct = response.json()
344
+ except (json.decoder.JSONDecodeError, NotAllowedToAccess) as exc:
345
+ raise Exception(
346
+ "Authentication failed. You have not been accepted into the beta.",
347
+ ) from exc
348
+ if self.struct["result"]["value"] == "UnauthorizedRequest":
349
+ raise NotAllowedToAccess(self.struct["result"]["message"])
350
+
351
+ @staticmethod
352
+ async def create(
353
+ proxy = None,
354
+ cookies = None,
355
+ ):
356
+ self = _Conversation(async_mode=True)
357
+ self.struct = {
358
+ "conversationId": None,
359
+ "clientId": None,
360
+ "conversationSignature": None,
361
+ "result": {"value": "Success", "message": None},
362
+ }
363
+ self.proxy = proxy
364
+ proxy = (
365
+ proxy
366
+ or os.environ.get("all_proxy")
367
+ or os.environ.get("ALL_PROXY")
368
+ or os.environ.get("https_proxy")
369
+ or os.environ.get("HTTPS_PROXY")
370
+ or None
371
+ )
372
+ if proxy is not None and proxy.startswith("socks5h://"):
373
+ proxy = "socks5://" + proxy[len("socks5h://") :]
374
+ transport = httpx.AsyncHTTPTransport(retries=10)
375
+ # Convert cookie format to httpx format
376
+ formatted_cookies = None
377
+ if cookies:
378
+ formatted_cookies = httpx.Cookies()
379
+ for cookie in cookies:
380
+ formatted_cookies.set(cookie["name"], cookie["value"])
381
+ async with httpx.AsyncClient(
382
+ proxies=proxy,
383
+ timeout=30,
384
+ headers=HEADERS_INIT_CONVER,
385
+ transport=transport,
386
+ cookies=formatted_cookies,
387
+ ) as client:
388
+ # Send GET request
389
+ response = await client.get(
390
+ url=os.environ.get("BING_PROXY_URL")
391
+ or "https://edgeservices.bing.com/edgesvc/turing/conversation/create",
392
+ )
393
+ if response.status_code != 200:
394
+ response = await client.get(
395
+ "https://edge.churchless.tech/edgesvc/turing/conversation/create",
396
+ )
397
+ if response.status_code != 200:
398
+ print(f"Status code: {response.status_code}")
399
+ print(response.text)
400
+ print(response.url)
401
+ raise Exception("Authentication failed")
402
+ try:
403
+ self.struct = response.json()
404
+ except (json.decoder.JSONDecodeError, NotAllowedToAccess) as exc:
405
+ raise Exception(
406
+ "Authentication failed. You have not been accepted into the beta.",
407
+ ) from exc
408
+ if self.struct["result"]["value"] == "UnauthorizedRequest":
409
+ raise NotAllowedToAccess(self.struct["result"]["message"])
410
+ return self
411
+
412
+
413
+ class _ChatHub:
414
+ """
415
+ Chat API
416
+ """
417
+
418
+ def __init__(
419
+ self,
420
+ conversation: _Conversation,
421
+ proxy = None,
422
+ cookies = None,
423
+ ) -> None:
424
+ self.session = None
425
+ self.wss = None
426
+ self.request: _ChatHubRequest
427
+ self.loop: bool
428
+ self.task: asyncio.Task
429
+ self.request = _ChatHubRequest(
430
+ conversation_signature=conversation.struct["conversationSignature"],
431
+ client_id=conversation.struct["clientId"],
432
+ conversation_id=conversation.struct["conversationId"],
433
+ )
434
+ self.cookies = cookies
435
+ self.proxy: str = proxy
436
+
437
+ async def ask_stream(
438
+ self,
439
+ prompt: str,
440
+ wss_link: str,
441
+ conversation_style: CONVERSATION_STYLE_TYPE = None,
442
+ raw: bool = False,
443
+ options: dict = None,
444
+ webpage_context = None,
445
+ search_result: bool = False,
446
+ ) -> Generator[str, None, None]:
447
+ """
448
+ Ask a question to the bot
449
+ """
450
+ req_header = HEADERS
451
+ if self.cookies is not None:
452
+ ws_cookies = []
453
+ for cookie in self.cookies:
454
+ ws_cookies.append(f"{cookie['name']}={cookie['value']}")
455
+ req_header.update({
456
+ 'Cookie': ';'.join(ws_cookies),
457
+ })
458
+
459
+ timeout = aiohttp.ClientTimeout(total=30)
460
+ self.session = aiohttp.ClientSession(timeout=timeout)
461
+
462
+ if self.wss and not self.wss.closed:
463
+ await self.wss.close()
464
+ # Check if websocket is closed
465
+ self.wss = await self.session.ws_connect(
466
+ wss_link,
467
+ headers=req_header,
468
+ ssl=ssl_context,
469
+ proxy=self.proxy,
470
+ autoping=False,
471
+ )
472
+ await self._initial_handshake()
473
+ if self.request.invocation_id == 0:
474
+ # Construct a ChatHub request
475
+ self.request.update(
476
+ prompt=prompt,
477
+ conversation_style=conversation_style,
478
+ options=options,
479
+ webpage_context=webpage_context,
480
+ search_result=search_result,
481
+ )
482
+ else:
483
+ async with httpx.AsyncClient() as client:
484
+ response = await client.post(
485
+ "https://sydney.bing.com/sydney/UpdateConversation/",
486
+ json={
487
+ "messages": [
488
+ {
489
+ "author": "user",
490
+ "description": webpage_context,
491
+ "contextType": "WebPage",
492
+ "messageType": "Context",
493
+ },
494
+ ],
495
+ "conversationId": self.request.conversation_id,
496
+ "source": "cib",
497
+ "traceId": _get_ran_hex(32),
498
+ "participant": {"id": self.request.client_id},
499
+ "conversationSignature": self.request.conversation_signature,
500
+ },
501
+ )
502
+ if response.status_code != 200:
503
+ print(f"Status code: {response.status_code}")
504
+ print(response.text)
505
+ print(response.url)
506
+ raise Exception("Update web page context failed")
507
+ # Construct a ChatHub request
508
+ self.request.update(
509
+ prompt=prompt,
510
+ conversation_style=conversation_style,
511
+ options=options,
512
+ )
513
+ # Send request
514
+ await self.wss.send_str(_append_identifier(self.request.struct))
515
+ final = False
516
+ draw = False
517
+ resp_txt = ""
518
+ result_text = ""
519
+ resp_txt_no_link = ""
520
+ while not final:
521
+ msg = await self.wss.receive()
522
+ try:
523
+ objects = msg.data.split(DELIMITER)
524
+ except :
525
+ continue
526
+
527
+ for obj in objects:
528
+ if obj is None or not obj:
529
+ continue
530
+ response = json.loads(obj)
531
+ if response.get("type") != 2 and raw:
532
+ yield False, response
533
+ elif response.get("type") == 1 and response["arguments"][0].get(
534
+ "messages",
535
+ ):
536
+ if not draw:
537
+ if (
538
+ response["arguments"][0]["messages"][0].get("messageType")
539
+ == "GenerateContentQuery"
540
+ ):
541
+ async with ImageGenAsync("", True) as image_generator:
542
+ images = await image_generator.get_images(
543
+ response["arguments"][0]["messages"][0]["text"],
544
+ )
545
+ for i, image in enumerate(images):
546
+ resp_txt = resp_txt + f"\n![image{i}]({image})"
547
+ draw = True
548
+ if (
549
+ response["arguments"][0]["messages"][0]["contentOrigin"]
550
+ != "Apology"
551
+ ) and not draw:
552
+ resp_txt = result_text + response["arguments"][0][
553
+ "messages"
554
+ ][0]["adaptiveCards"][0]["body"][0].get("text", "")
555
+ resp_txt_no_link = result_text + response["arguments"][0][
556
+ "messages"
557
+ ][0].get("text", "")
558
+ if response["arguments"][0]["messages"][0].get(
559
+ "messageType",
560
+ ):
561
+ resp_txt = (
562
+ resp_txt
563
+ + response["arguments"][0]["messages"][0][
564
+ "adaptiveCards"
565
+ ][0]["body"][0]["inlines"][0].get("text")
566
+ + "\n"
567
+ )
568
+ result_text = (
569
+ result_text
570
+ + response["arguments"][0]["messages"][0][
571
+ "adaptiveCards"
572
+ ][0]["body"][0]["inlines"][0].get("text")
573
+ + "\n"
574
+ )
575
+ yield False, resp_txt
576
+
577
+ elif response.get("type") == 2:
578
+ if response["item"]["result"].get("error"):
579
+ await self.close()
580
+ raise Exception(
581
+ f"{response['item']['result']['value']}: {response['item']['result']['message']}",
582
+ )
583
+ if draw:
584
+ cache = response["item"]["messages"][1]["adaptiveCards"][0][
585
+ "body"
586
+ ][0]["text"]
587
+ response["item"]["messages"][1]["adaptiveCards"][0]["body"][0][
588
+ "text"
589
+ ] = (cache + resp_txt)
590
+ if (
591
+ response["item"]["messages"][-1]["contentOrigin"] == "Apology"
592
+ and resp_txt
593
+ ):
594
+ response["item"]["messages"][-1]["text"] = resp_txt_no_link
595
+ response["item"]["messages"][-1]["adaptiveCards"][0]["body"][0][
596
+ "text"
597
+ ] = resp_txt
598
+ print(
599
+ "Preserved the message from being deleted",
600
+ file=sys.stderr,
601
+ )
602
+ final = True
603
+ await self.close()
604
+ yield True, response
605
+
606
+ async def _initial_handshake(self) -> None:
607
+ await self.wss.send_str(_append_identifier({"protocol": "json", "version": 1}))
608
+ await self.wss.receive()
609
+
610
+ async def close(self) -> None:
611
+ """
612
+ Close the connection
613
+ """
614
+ if self.wss and not self.wss.closed:
615
+ await self.wss.close()
616
+ if self.session and not self.session.closed:
617
+ await self.session.close()
618
+
619
+
620
+ class Chatbot:
621
+ """
622
+ Combines everything to make it seamless
623
+ """
624
+
625
+ def __init__(
626
+ self,
627
+ proxy = None,
628
+ cookies = None,
629
+ ) -> None:
630
+ self.proxy = proxy
631
+ self.chat_hub: _ChatHub = _ChatHub(
632
+ _Conversation(self.proxy, cookies=cookies),
633
+ proxy=self.proxy,
634
+ cookies=cookies,
635
+ )
636
+
637
+ @staticmethod
638
+ async def create(
639
+ proxy = None,
640
+ cookies = None,
641
+ ):
642
+ self = Chatbot.__new__(Chatbot)
643
+ self.proxy = proxy
644
+ self.chat_hub = _ChatHub(
645
+ await _Conversation.create(self.proxy, cookies=cookies),
646
+ proxy=self.proxy,
647
+ cookies=cookies,
648
+ )
649
+ return self
650
+
651
+ async def ask(
652
+ self,
653
+ prompt: str,
654
+ wss_link: str = "wss://sydney.bing.com/sydney/ChatHub",
655
+ conversation_style: CONVERSATION_STYLE_TYPE = None,
656
+ options: dict = None,
657
+ webpage_context = None,
658
+ search_result: bool = False,
659
+ ) -> dict:
660
+ """
661
+ Ask a question to the bot
662
+ """
663
+ async for final, response in self.chat_hub.ask_stream(
664
+ prompt=prompt,
665
+ conversation_style=conversation_style,
666
+ wss_link=wss_link,
667
+ options=options,
668
+ webpage_context=webpage_context,
669
+ search_result=search_result,
670
+ ):
671
+ if final:
672
+ return response
673
+ await self.chat_hub.wss.close()
674
+ return {}
675
+
676
+ async def ask_stream(
677
+ self,
678
+ prompt: str,
679
+ wss_link: str = "wss://sydney.bing.com/sydney/ChatHub",
680
+ conversation_style: CONVERSATION_STYLE_TYPE = None,
681
+ raw: bool = False,
682
+ options: dict = None,
683
+ webpage_context = None,
684
+ search_result: bool = False,
685
+ ) -> Generator[str, None, None]:
686
+ """
687
+ Ask a question to the bot
688
+ """
689
+ async for response in self.chat_hub.ask_stream(
690
+ prompt=prompt,
691
+ conversation_style=conversation_style,
692
+ wss_link=wss_link,
693
+ raw=raw,
694
+ options=options,
695
+ webpage_context=webpage_context,
696
+ search_result=search_result,
697
+ ):
698
+ yield response
699
+
700
+ async def close(self) -> None:
701
+ """
702
+ Close the connection
703
+ """
704
+ await self.chat_hub.close()
705
+
706
+ async def reset(self) -> None:
707
+ """
708
+ Reset the conversation
709
+ """
710
+ await self.close()
711
+ self.chat_hub = _ChatHub(
712
+ await _Conversation.create(self.proxy),
713
+ proxy=self.proxy,
714
+ cookies=self.chat_hub.cookies,
715
+ )
716
+
717
+
718
+ async def _get_input_async(
719
+ session: PromptSession = None,
720
+ completer: WordCompleter = None,
721
+ ) -> str:
722
+ """
723
+ Multiline input function.
724
+ """
725
+ return await session.prompt_async(
726
+ completer=completer,
727
+ multiline=True,
728
+ auto_suggest=AutoSuggestFromHistory(),
729
+ )
730
+
731
+
732
+ def _create_session() -> PromptSession:
733
+ kb = KeyBindings()
734
+
735
+ @kb.add("enter")
736
+ def _(event):
737
+ buffer_text = event.current_buffer.text
738
+ if buffer_text.startswith("!"):
739
+ event.current_buffer.validate_and_handle()
740
+ else:
741
+ event.current_buffer.insert_text("\n")
742
+
743
+ @kb.add("escape")
744
+ def _(event):
745
+ if event.current_buffer.complete_state:
746
+ # event.current_buffer.cancel_completion()
747
+ event.current_buffer.text = ""
748
+
749
+ return PromptSession(key_bindings=kb, history=InMemoryHistory())
750
+
751
+
752
+ def _create_completer(commands: list, pattern_str: str = "$"):
753
+ return WordCompleter(words=commands, pattern=re.compile(pattern_str))
754
+
755
+
756
+ async def async_main(args: argparse.Namespace) -> None:
757
+ """
758
+ Main function
759
+ """
760
+ print("Initializing...")
761
+ print("Enter `alt+enter` or `escape+enter` to send a message")
762
+ # Read and parse cookies
763
+ cookies = None
764
+ if args.cookie_file:
765
+ cookies = json.loads(open(args.cookie_file, encoding="utf-8").read())
766
+ bot = await Chatbot.create(proxy=args.proxy, cookies=cookies)
767
+ session = _create_session()
768
+ completer = _create_completer(["!help", "!exit", "!reset"])
769
+ initial_prompt = args.prompt
770
+
771
+ while True:
772
+ print("\nYou:")
773
+ if initial_prompt:
774
+ question = initial_prompt
775
+ print(question)
776
+ initial_prompt = None
777
+ else:
778
+ question = (
779
+ input()
780
+ if args.enter_once
781
+ else await _get_input_async(session=session, completer=completer)
782
+ )
783
+ print()
784
+ if question == "!exit":
785
+ break
786
+ if question == "!help":
787
+ print(
788
+ """
789
+ !help - Show this help message
790
+ !exit - Exit the program
791
+ !reset - Reset the conversation
792
+ """,
793
+ )
794
+ continue
795
+ if question == "!reset":
796
+ await bot.reset()
797
+ continue
798
+ print("Bot:")
799
+ if args.no_stream:
800
+ print(
801
+ (
802
+ await bot.ask(
803
+ prompt=question,
804
+ conversation_style=args.style,
805
+ wss_link=args.wss_link,
806
+ )
807
+ )["item"]["messages"][1]["adaptiveCards"][0]["body"][0]["text"],
808
+ )
809
+ else:
810
+ wrote = 0
811
+ if args.rich:
812
+ md = Markdown("")
813
+ with Live(md, auto_refresh=False) as live:
814
+ async for final, response in bot.ask_stream(
815
+ prompt=question,
816
+ conversation_style=args.style,
817
+ wss_link=args.wss_link,
818
+ ):
819
+ if not final:
820
+ if wrote > len(response):
821
+ print(md)
822
+ print(Markdown("***Bing revoked the response.***"))
823
+ wrote = len(response)
824
+ md = Markdown(response)
825
+ live.update(md, refresh=True)
826
+ else:
827
+ async for final, response in bot.ask_stream(
828
+ prompt=question,
829
+ conversation_style=args.style,
830
+ wss_link=args.wss_link,
831
+ ):
832
+ if not final:
833
+ if not wrote:
834
+ print(response, end="", flush=True)
835
+ else:
836
+ print(response[wrote:], end="", flush=True)
837
+ wrote = len(response)
838
+ print()
839
+ await bot.close()
840
+
841
+
842
+ def main() -> None:
843
+ print(
844
+ """
845
+ EdgeGPT - A demo of reverse engineering the Bing GPT chatbot
846
+ Repo: github.com/acheong08/EdgeGPT
847
+ By: Antonio Cheong
848
+
849
+ !help for help
850
+
851
+ Type !exit to exit
852
+ """,
853
+ )
854
+ parser = argparse.ArgumentParser()
855
+ parser.add_argument("--enter-once", action="store_true")
856
+ parser.add_argument("--no-stream", action="store_true")
857
+ parser.add_argument("--rich", action="store_true")
858
+ parser.add_argument(
859
+ "--proxy",
860
+ help="Proxy URL (e.g. socks5://127.0.0.1:1080)",
861
+ type=str,
862
+ )
863
+ parser.add_argument(
864
+ "--wss-link",
865
+ help="WSS URL(e.g. wss://sydney.bing.com/sydney/ChatHub)",
866
+ type=str,
867
+ default="wss://sydney.bing.com/sydney/ChatHub",
868
+ )
869
+ parser.add_argument(
870
+ "--style",
871
+ choices=["creative", "balanced", "precise"],
872
+ default="balanced",
873
+ )
874
+ parser.add_argument(
875
+ "--prompt",
876
+ type=str,
877
+ default="",
878
+ required=False,
879
+ help="prompt to start with",
880
+ )
881
+ parser.add_argument(
882
+ "--cookie-file",
883
+ type=str,
884
+ default="",
885
+ required=False,
886
+ help="path to cookie file",
887
+ )
888
+ args = parser.parse_args()
889
+ asyncio.run(async_main(args))
890
+
891
+
892
+ class Cookie:
893
+ """
894
+ Convenience class for Bing Cookie files, data, and configuration. This Class
895
+ is updated dynamically by the Query class to allow cycling through >1
896
+ cookie/credentials file e.g. when daily request limits (current 200 per
897
+ account per day) are exceeded.
898
+ """
899
+
900
+ current_file_index = 0
901
+ dirpath = Path("./").resolve()
902
+ search_pattern = "bing_cookies_*.json"
903
+ ignore_files = set()
904
+
905
+ @classmethod
906
+ def fetch_default(cls, path=None):
907
+ from selenium import webdriver
908
+ from selenium.webdriver.common.by import By
909
+
910
+ driver = webdriver.Edge()
911
+ driver.get("https://bing.com/chat")
912
+ time.sleep(5)
913
+ xpath = '//button[@id="bnp_btn_accept"]'
914
+ driver.find_element(By.XPATH, xpath).click()
915
+ time.sleep(2)
916
+ xpath = '//a[@id="codexPrimaryButton"]'
917
+ driver.find_element(By.XPATH, xpath).click()
918
+ if path is None:
919
+ path = Path("./bing_cookies__default.json")
920
+ # Double underscore ensures this file is first when sorted
921
+ cookies = driver.get_cookies()
922
+ Path(path).write_text(json.dumps(cookies, indent=4), encoding="utf-8")
923
+ # Path again in case supplied path is: str
924
+ print(f"Cookies saved to: {path}")
925
+ driver.quit()
926
+
927
+ @classmethod
928
+ def files(cls):
929
+ """Return a sorted list of all cookie files matching .search_pattern"""
930
+ all_files = set(cls.dirpath.glob(cls.search_pattern))
931
+ return sorted(list(all_files - cls.ignore_files))
932
+
933
+ @classmethod
934
+ def import_data(cls):
935
+ """
936
+ Read the active cookie file and populate the following attributes:
937
+
938
+ .current_filepath
939
+ .current_data
940
+ .image_token
941
+ """
942
+ try:
943
+ cls.current_filepath = cls.files()[cls.current_file_index]
944
+ except IndexError:
945
+ print(
946
+ "> Please set Cookie.current_filepath to a valid cookie file, then run Cookie.import_data()",
947
+ )
948
+ return
949
+ print(f"> Importing cookies from: {cls.current_filepath.name}")
950
+ with open(cls.current_filepath, encoding="utf-8") as file:
951
+ cls.current_data = json.load(file)
952
+ cls.image_token = [x for x in cls.current_data if x.get("name") == "_U"]
953
+ cls.image_token = cls.image_token[0].get("value")
954
+
955
+ @classmethod
956
+ def import_next(cls):
957
+ """
958
+ Cycle through to the next cookies file. Import it. Mark the previous
959
+ file to be ignored for the remainder of the current session.
960
+ """
961
+ cls.ignore_files.add(cls.current_filepath)
962
+ if Cookie.current_file_index >= len(cls.files()):
963
+ Cookie.current_file_index = 0
964
+ Cookie.import_data()
965
+
966
+
967
+ class Query:
968
+ """
969
+ A convenience class that wraps around EdgeGPT.Chatbot to encapsulate input,
970
+ config, and output all together. Relies on Cookie class for authentication
971
+ """
972
+
973
+ def __init__(
974
+ self,
975
+ prompt,
976
+ style="precise",
977
+ content_type="text",
978
+ cookie_file=0,
979
+ echo=True,
980
+ echo_prompt=False,
981
+ ):
982
+ """
983
+ Arguments:
984
+
985
+ prompt: Text to enter into Bing Chat
986
+ style: creative, balanced, or precise
987
+ content_type: "text" for Bing Chat; "image" for Dall-e
988
+ cookie_file: Path, filepath string, or index (int) to list of cookie paths
989
+ echo: Print something to confirm request made
990
+ echo_prompt: Print confirmation of the evaluated prompt
991
+ """
992
+ self.index = []
993
+ self.request_count = {}
994
+ self.image_dirpath = Path("./").resolve()
995
+ Cookie.import_data()
996
+ self.index += [self]
997
+ self.prompt = prompt
998
+ files = Cookie.files()
999
+ if isinstance(cookie_file, int):
1000
+ index = cookie_file if cookie_file < len(files) else 0
1001
+ else:
1002
+ if not isinstance(cookie_file, (str, Path)):
1003
+ message = "'cookie_file' must be an int, str, or Path object"
1004
+ raise TypeError(message)
1005
+ cookie_file = Path(cookie_file)
1006
+ if cookie_file in files(): # Supplied filepath IS in Cookie.dirpath
1007
+ index = files.index(cookie_file)
1008
+ else: # Supplied filepath is NOT in Cookie.dirpath
1009
+ if cookie_file.is_file():
1010
+ Cookie.dirpath = cookie_file.parent.resolve()
1011
+ if cookie_file.is_dir():
1012
+ Cookie.dirpath = cookie_file.resolve()
1013
+ index = 0
1014
+ Cookie.current_file_index = index
1015
+ if content_type == "text":
1016
+ self.style = style
1017
+ self.log_and_send_query(echo, echo_prompt)
1018
+ if content_type == "image":
1019
+ self.create_image()
1020
+
1021
+ def log_and_send_query(self, echo, echo_prompt):
1022
+ self.response = asyncio.run(self.send_to_bing(echo, echo_prompt))
1023
+ name = str(Cookie.current_filepath.name)
1024
+ if not self.request_count.get(name):
1025
+ self.request_count[name] = 1
1026
+ else:
1027
+ self.request_count[name] += 1
1028
+
1029
+ def create_image(self):
1030
+ image_generator = ImageGen(Cookie.image_token)
1031
+ image_generator.save_images(
1032
+ image_generator.get_images(self.prompt),
1033
+ output_dir=self.image_dirpath,
1034
+ )
1035
+
1036
+ async def send_to_bing(self, echo=True, echo_prompt=False):
1037
+ """Creat, submit, then close a Chatbot instance. Return the response"""
1038
+ retries = len(Cookie.files())
1039
+ while retries:
1040
+ try:
1041
+ bot = await Chatbot.create()
1042
+ if echo_prompt:
1043
+ print(f"> {self.prompt=}")
1044
+ if echo:
1045
+ print("> Waiting for response...")
1046
+ if self.style.lower() not in "creative balanced precise".split():
1047
+ self.style = "precise"
1048
+ response = await bot.ask(
1049
+ prompt=self.prompt,
1050
+ conversation_style=getattr(ConversationStyle, self.style),
1051
+ # wss_link="wss://sydney.bing.com/sydney/ChatHub"
1052
+ # What other values can this parameter take? It seems to be optional
1053
+ )
1054
+ return response
1055
+ except KeyError:
1056
+ print(
1057
+ f"> KeyError [{Cookie.current_filepath.name} may have exceeded the daily limit]",
1058
+ )
1059
+ Cookie.import_next()
1060
+ retries -= 1
1061
+ finally:
1062
+ await bot.close()
1063
+
1064
+ @property
1065
+ def output(self):
1066
+ """The response from a completed Chatbot request"""
1067
+ return self.response["item"]["messages"][1]["text"]
1068
+
1069
+ @property
1070
+ def sources(self):
1071
+ """The source names and details parsed from a completed Chatbot request"""
1072
+ return self.response["item"]["messages"][1]["sourceAttributions"]
1073
+
1074
+ @property
1075
+ def sources_dict(self):
1076
+ """The source names and details as a dictionary"""
1077
+ sources_dict = {}
1078
+ name = "providerDisplayName"
1079
+ url = "seeMoreUrl"
1080
+ for source in self.sources:
1081
+ if name in source.keys() and url in source.keys():
1082
+ sources_dict[source[name]] = source[url]
1083
+ else:
1084
+ continue
1085
+ return sources_dict
1086
+
1087
+ @property
1088
+ def code(self):
1089
+ """Extract and join any snippets of Python code in the response"""
1090
+ code_blocks = self.output.split("```")[1:-1:2]
1091
+ code_blocks = ["\n".join(x.splitlines()[1:]) for x in code_blocks]
1092
+ return "\n\n".join(code_blocks)
1093
+
1094
+ @property
1095
+ def languages(self):
1096
+ """Extract all programming languages given in code blocks"""
1097
+ code_blocks = self.output.split("```")[1:-1:2]
1098
+ return {x.splitlines()[0] for x in code_blocks}
1099
+
1100
+ @property
1101
+ def suggestions(self):
1102
+ """Follow-on questions suggested by the Chatbot"""
1103
+ return [
1104
+ x["text"]
1105
+ for x in self.response["item"]["messages"][1]["suggestedResponses"]
1106
+ ]
1107
+
1108
+ def __repr__(self):
1109
+ return f"<EdgeGPT.Query: {self.prompt}>"
1110
+
1111
+ def __str__(self):
1112
+ return self.output
1113
+
1114
+
1115
+ class ImageQuery(Query):
1116
+ def __init__(self, prompt, **kwargs):
1117
+ kwargs.update({"content_type": "image"})
1118
+ super().__init__(prompt, **kwargs)
1119
+
1120
+ def __repr__(self):
1121
+ return f"<EdgeGPT.ImageQuery: {self.prompt}>"
1122
+
1123
+
1124
+ if __name__ == "__main__":
1125
+ main()
request_llms/key_manager.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+
3
+ def Singleton(cls):
4
+ _instance = {}
5
+
6
+ def _singleton(*args, **kargs):
7
+ if cls not in _instance:
8
+ _instance[cls] = cls(*args, **kargs)
9
+ return _instance[cls]
10
+
11
+ return _singleton
12
+
13
+
14
+ @Singleton
15
+ class OpenAI_ApiKeyManager():
16
+ def __init__(self, mode='blacklist') -> None:
17
+ # self.key_avail_list = []
18
+ self.key_black_list = []
19
+
20
+ def add_key_to_blacklist(self, key):
21
+ self.key_black_list.append(key)
22
+
23
+ def select_avail_key(self, key_list):
24
+ # select key from key_list, but avoid keys also in self.key_black_list, raise error if no key can be found
25
+ available_keys = [key for key in key_list if key not in self.key_black_list]
26
+ if not available_keys:
27
+ raise KeyError("No available key found.")
28
+ selected_key = random.choice(available_keys)
29
+ return selected_key
request_llms/local_llm_class.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import threading
3
+ from toolbox import update_ui, Singleton
4
+ from multiprocessing import Process, Pipe
5
+ from contextlib import redirect_stdout
6
+ from request_llms.queued_pipe import create_queue_pipe
7
+
8
+ class ThreadLock(object):
9
+ def __init__(self):
10
+ self._lock = threading.Lock()
11
+
12
+ def acquire(self):
13
+ # print("acquiring", self)
14
+ #traceback.print_tb
15
+ self._lock.acquire()
16
+ # print("acquired", self)
17
+
18
+ def release(self):
19
+ # print("released", self)
20
+ #traceback.print_tb
21
+ self._lock.release()
22
+
23
+ def __enter__(self):
24
+ self.acquire()
25
+
26
+ def __exit__(self, type, value, traceback):
27
+ self.release()
28
+
29
+ @Singleton
30
+ class GetSingletonHandle():
31
+ def __init__(self):
32
+ self.llm_model_already_running = {}
33
+
34
+ def get_llm_model_instance(self, cls, *args, **kargs):
35
+ if cls not in self.llm_model_already_running:
36
+ self.llm_model_already_running[cls] = cls(*args, **kargs)
37
+ return self.llm_model_already_running[cls]
38
+ elif self.llm_model_already_running[cls].corrupted:
39
+ self.llm_model_already_running[cls] = cls(*args, **kargs)
40
+ return self.llm_model_already_running[cls]
41
+ else:
42
+ return self.llm_model_already_running[cls]
43
+
44
+ def reset_tqdm_output():
45
+ import sys, tqdm
46
+ def status_printer(self, file):
47
+ fp = file
48
+ if fp in (sys.stderr, sys.stdout):
49
+ getattr(sys.stderr, 'flush', lambda: None)()
50
+ getattr(sys.stdout, 'flush', lambda: None)()
51
+
52
+ def fp_write(s):
53
+ print(s)
54
+ last_len = [0]
55
+
56
+ def print_status(s):
57
+ from tqdm.utils import disp_len
58
+ len_s = disp_len(s)
59
+ fp_write('\r' + s + (' ' * max(last_len[0] - len_s, 0)))
60
+ last_len[0] = len_s
61
+ return print_status
62
+ tqdm.tqdm.status_printer = status_printer
63
+
64
+
65
+ class LocalLLMHandle(Process):
66
+ def __init__(self):
67
+ # ⭐run in main process
68
+ super().__init__(daemon=True)
69
+ self.is_main_process = True # init
70
+ self.corrupted = False
71
+ self.load_model_info()
72
+ self.parent, self.child = create_queue_pipe()
73
+ self.parent_state, self.child_state = create_queue_pipe()
74
+ # allow redirect_stdout
75
+ self.std_tag = "[Subprocess Message] "
76
+ self.running = True
77
+ self._model = None
78
+ self._tokenizer = None
79
+ self.state = ""
80
+ self.check_dependency()
81
+ self.is_main_process = False # state wrap for child process
82
+ self.start()
83
+ self.is_main_process = True # state wrap for child process
84
+ self.threadLock = ThreadLock()
85
+
86
+ def get_state(self):
87
+ # ⭐run in main process
88
+ while self.parent_state.poll():
89
+ self.state = self.parent_state.recv()
90
+ return self.state
91
+
92
+ def set_state(self, new_state):
93
+ # ⭐run in main process or 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
94
+ if self.is_main_process:
95
+ self.state = new_state
96
+ else:
97
+ self.child_state.send(new_state)
98
+
99
+ def load_model_info(self):
100
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
101
+ raise NotImplementedError("Method not implemented yet")
102
+ self.model_name = ""
103
+ self.cmd_to_install = ""
104
+
105
+ def load_model_and_tokenizer(self):
106
+ """
107
+ This function should return the model and the tokenizer
108
+ """
109
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
110
+ raise NotImplementedError("Method not implemented yet")
111
+
112
+ def llm_stream_generator(self, **kwargs):
113
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
114
+ raise NotImplementedError("Method not implemented yet")
115
+
116
+ def try_to_import_special_deps(self, **kwargs):
117
+ """
118
+ import something that will raise error if the user does not install requirement_*.txt
119
+ """
120
+ # ⭐run in main process
121
+ raise NotImplementedError("Method not implemented yet")
122
+
123
+ def check_dependency(self):
124
+ # ⭐run in main process
125
+ try:
126
+ self.try_to_import_special_deps()
127
+ self.set_state("`依赖检测通过`")
128
+ self.running = True
129
+ except:
130
+ self.set_state(f"缺少{self.model_name}的依赖,如果要使用{self.model_name},除了基础的pip依赖以外,您还需要运行{self.cmd_to_install}安装{self.model_name}的依赖。")
131
+ self.running = False
132
+
133
+ def run(self):
134
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
135
+ # 第一次运行,加载参数
136
+ self.child.flush = lambda *args: None
137
+ self.child.write = lambda x: self.child.send(self.std_tag + x)
138
+ reset_tqdm_output()
139
+ self.set_state("`尝试加载模型`")
140
+ try:
141
+ with redirect_stdout(self.child):
142
+ self._model, self._tokenizer = self.load_model_and_tokenizer()
143
+ except:
144
+ self.set_state("`加载模型失败`")
145
+ self.running = False
146
+ from toolbox import trimmed_format_exc
147
+ self.child.send(
148
+ f'[Local Message] 不能正常加载{self.model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
149
+ self.child.send('[FinishBad]')
150
+ raise RuntimeError(f"不能正常加载{self.model_name}的参数!")
151
+
152
+ self.set_state("`准备就绪`")
153
+ while True:
154
+ # 进入任务等待状态
155
+ kwargs = self.child.recv()
156
+ # 收到消息,开始请求
157
+ try:
158
+ for response_full in self.llm_stream_generator(**kwargs):
159
+ self.child.send(response_full)
160
+ # print('debug' + response_full)
161
+ self.child.send('[Finish]')
162
+ # 请求处理结束,开始下一个循环
163
+ except:
164
+ from toolbox import trimmed_format_exc
165
+ self.child.send(
166
+ f'[Local Message] 调用{self.model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
167
+ self.child.send('[Finish]')
168
+
169
+ def clear_pending_messages(self):
170
+ # ⭐run in main process
171
+ while True:
172
+ if self.parent.poll():
173
+ self.parent.recv()
174
+ continue
175
+ for _ in range(5):
176
+ time.sleep(0.5)
177
+ if self.parent.poll():
178
+ r = self.parent.recv()
179
+ continue
180
+ break
181
+ return
182
+
183
+ def stream_chat(self, **kwargs):
184
+ # ⭐run in main process
185
+ if self.get_state() == "`准备就绪`":
186
+ yield "`正在等待线程锁,排队中请稍候 ...`"
187
+
188
+ with self.threadLock:
189
+ if self.parent.poll():
190
+ yield "`排队中请稍候 ...`"
191
+ self.clear_pending_messages()
192
+ self.parent.send(kwargs)
193
+ std_out = ""
194
+ std_out_clip_len = 4096
195
+ while True:
196
+ res = self.parent.recv()
197
+ # pipe_watch_dog.feed()
198
+ if res.startswith(self.std_tag):
199
+ new_output = res[len(self.std_tag):]
200
+ std_out = std_out[:std_out_clip_len]
201
+ print(new_output, end='')
202
+ std_out = new_output + std_out
203
+ yield self.std_tag + '\n```\n' + std_out + '\n```\n'
204
+ elif res == '[Finish]':
205
+ break
206
+ elif res == '[FinishBad]':
207
+ self.running = False
208
+ self.corrupted = True
209
+ break
210
+ else:
211
+ std_out = ""
212
+ yield res
213
+
214
+ def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'):
215
+ load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……"
216
+
217
+ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
218
+ """
219
+ refer to request_llms/bridge_all.py
220
+ """
221
+ _llm_handle = GetSingletonHandle().get_llm_model_instance(LLMSingletonClass)
222
+ if len(observe_window) >= 1:
223
+ observe_window[0] = load_message + "\n\n" + _llm_handle.get_state()
224
+ if not _llm_handle.running:
225
+ raise RuntimeError(_llm_handle.get_state())
226
+
227
+ if history_format == 'classic':
228
+ # 没有 sys_prompt 接口,因此把prompt加入 history
229
+ history_feedin = []
230
+ history_feedin.append([sys_prompt, "Certainly!"])
231
+ for i in range(len(history)//2):
232
+ history_feedin.append([history[2*i], history[2*i+1]])
233
+ elif history_format == 'chatglm3':
234
+ # 有 sys_prompt 接口
235
+ conversation_cnt = len(history) // 2
236
+ history_feedin = [{"role": "system", "content": sys_prompt}]
237
+ if conversation_cnt:
238
+ for index in range(0, 2*conversation_cnt, 2):
239
+ what_i_have_asked = {}
240
+ what_i_have_asked["role"] = "user"
241
+ what_i_have_asked["content"] = history[index]
242
+ what_gpt_answer = {}
243
+ what_gpt_answer["role"] = "assistant"
244
+ what_gpt_answer["content"] = history[index+1]
245
+ if what_i_have_asked["content"] != "":
246
+ if what_gpt_answer["content"] == "":
247
+ continue
248
+ history_feedin.append(what_i_have_asked)
249
+ history_feedin.append(what_gpt_answer)
250
+ else:
251
+ history_feedin[-1]['content'] = what_gpt_answer['content']
252
+
253
+ watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, ��置5秒即可
254
+ response = ""
255
+ for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
256
+ if len(observe_window) >= 1:
257
+ observe_window[0] = response
258
+ if len(observe_window) >= 2:
259
+ if (time.time()-observe_window[1]) > watch_dog_patience:
260
+ raise RuntimeError("程序终止。")
261
+ return response
262
+
263
+ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
264
+ """
265
+ refer to request_llms/bridge_all.py
266
+ """
267
+ chatbot.append((inputs, ""))
268
+
269
+ _llm_handle = GetSingletonHandle().get_llm_model_instance(LLMSingletonClass)
270
+ chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.get_state())
271
+ yield from update_ui(chatbot=chatbot, history=[])
272
+ if not _llm_handle.running:
273
+ raise RuntimeError(_llm_handle.get_state())
274
+
275
+ if additional_fn is not None:
276
+ from core_functional import handle_core_functionality
277
+ inputs, history = handle_core_functionality(
278
+ additional_fn, inputs, history, chatbot)
279
+
280
+ # 处理历史信息
281
+ if history_format == 'classic':
282
+ # 没有 sys_prompt 接口,因此把prompt加入 history
283
+ history_feedin = []
284
+ history_feedin.append([system_prompt, "Certainly!"])
285
+ for i in range(len(history)//2):
286
+ history_feedin.append([history[2*i], history[2*i+1]])
287
+ elif history_format == 'chatglm3':
288
+ # 有 sys_prompt 接口
289
+ conversation_cnt = len(history) // 2
290
+ history_feedin = [{"role": "system", "content": system_prompt}]
291
+ if conversation_cnt:
292
+ for index in range(0, 2*conversation_cnt, 2):
293
+ what_i_have_asked = {}
294
+ what_i_have_asked["role"] = "user"
295
+ what_i_have_asked["content"] = history[index]
296
+ what_gpt_answer = {}
297
+ what_gpt_answer["role"] = "assistant"
298
+ what_gpt_answer["content"] = history[index+1]
299
+ if what_i_have_asked["content"] != "":
300
+ if what_gpt_answer["content"] == "":
301
+ continue
302
+ history_feedin.append(what_i_have_asked)
303
+ history_feedin.append(what_gpt_answer)
304
+ else:
305
+ history_feedin[-1]['content'] = what_gpt_answer['content']
306
+
307
+ # 开始接收回复
308
+ response = f"[Local Message] 等待{model_name}响应中 ..."
309
+ for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
310
+ chatbot[-1] = (inputs, response)
311
+ yield from update_ui(chatbot=chatbot, history=history)
312
+
313
+ # 总结输出
314
+ if response == f"[Local Message] 等待{model_name}响应中 ...":
315
+ response = f"[Local Message] {model_name}响应异常 ..."
316
+ history.extend([inputs, response])
317
+ yield from update_ui(chatbot=chatbot, history=history)
318
+
319
+ return predict_no_ui_long_connection, predict
request_llms/queued_pipe.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from multiprocessing import Pipe, Queue
2
+ import time
3
+ import threading
4
+
5
+ class PipeSide(object):
6
+ def __init__(self, q_2remote, q_2local) -> None:
7
+ self.q_2remote = q_2remote
8
+ self.q_2local = q_2local
9
+
10
+ def recv(self):
11
+ return self.q_2local.get()
12
+
13
+ def send(self, buf):
14
+ self.q_2remote.put(buf)
15
+
16
+ def poll(self):
17
+ return not self.q_2local.empty()
18
+
19
+ def create_queue_pipe():
20
+ q_p2c = Queue()
21
+ q_c2p = Queue()
22
+ pipe_c = PipeSide(q_2local=q_p2c, q_2remote=q_c2p)
23
+ pipe_p = PipeSide(q_2local=q_c2p, q_2remote=q_p2c)
24
+ return pipe_c, pipe_p
request_llms/requirements_chatglm.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ protobuf
2
+ cpm_kernels
3
+ torch>=1.10
4
+ mdtex2html
5
+ sentencepiece
request_llms/requirements_newbing.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ BingImageCreator
2
+ certifi
3
+ httpx
4
+ prompt_toolkit
5
+ requests
6
+ rich
7
+ websockets
8
+ httpx[socks]
request_llms/requirements_qwen.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ dashscope
request_llms/requirements_qwen_local.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ modelscope
2
+ transformers_stream_generator
3
+ auto-gptq
4
+ optimum
5
+ urllib3<2