gordonchan commited on
Commit
85b36ac
·
verified ·
1 Parent(s): 8efb4df

Upload 34 files

Browse files
Files changed (34) hide show
  1. crazy_functions/CodeInterpreter.py +232 -0
  2. crazy_functions/__init__.py +0 -0
  3. crazy_functions/agent_fns/auto_agent.py +23 -0
  4. crazy_functions/agent_fns/echo_agent.py +19 -0
  5. crazy_functions/agent_fns/general.py +134 -0
  6. crazy_functions/agent_fns/persistent.py +16 -0
  7. crazy_functions/agent_fns/pipe.py +194 -0
  8. crazy_functions/agent_fns/watchdog.py +28 -0
  9. crazy_functions/chatglm微调工具.py +141 -0
  10. crazy_functions/crazy_utils.py +609 -0
  11. crazy_functions/gen_fns/gen_fns_shared.py +70 -0
  12. crazy_functions/ipc_fns/mp.py +37 -0
  13. crazy_functions/json_fns/pydantic_io.py +111 -0
  14. crazy_functions/live_audio/aliyunASR.py +261 -0
  15. crazy_functions/live_audio/audio_io.py +51 -0
  16. crazy_functions/multi_stage/multi_stage_utils.py +93 -0
  17. crazy_functions/pdf_fns/breakdown_txt.py +125 -0
  18. crazy_functions/pdf_fns/parse_pdf.py +171 -0
  19. crazy_functions/pdf_fns/report_gen_html.py +58 -0
  20. crazy_functions/pdf_fns/report_template.html +0 -0
  21. crazy_functions/vt_fns/vt_call_plugin.py +114 -0
  22. crazy_functions/vt_fns/vt_modify_config.py +81 -0
  23. crazy_functions/vt_fns/vt_state.py +28 -0
  24. crazy_functions/命令行助手.py +31 -0
  25. crazy_functions/对话历史存档.py +152 -0
  26. crazy_functions/生成函数注释.py +56 -0
  27. crazy_functions/联网的ChatGPT.py +106 -0
  28. crazy_functions/联网的ChatGPT_bing版.py +106 -0
  29. crazy_functions/虚空终端.py +180 -0
  30. crazy_functions/解析JupyterNotebook.py +140 -0
  31. crazy_functions/解析项目源代码.py +371 -0
  32. crazy_functions/谷歌检索小助手.py +185 -0
  33. crazy_functions/辅助功能.py +54 -0
  34. crazy_functions/高级功能函数模板.py +29 -0
crazy_functions/CodeInterpreter.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections.abc import Callable, Iterable, Mapping
2
+ from typing import Any
3
+ from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc
4
+ from toolbox import promote_file_to_downloadzone, get_log_folder
5
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
6
+ from .crazy_utils import input_clipping, try_install_deps
7
+ from multiprocessing import Process, Pipe
8
+ import os
9
+ import time
10
+
11
+ templete = """
12
+ ```python
13
+ import ... # Put dependencies here, e.g. import numpy as np
14
+
15
+ class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
16
+
17
+ def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
18
+ # rewrite the function you have just written here
19
+ ...
20
+ return generated_file_path
21
+ ```
22
+ """
23
+
24
+ def inspect_dependency(chatbot, history):
25
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
26
+ return True
27
+
28
+ def get_code_block(reply):
29
+ import re
30
+ pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
31
+ matches = re.findall(pattern, reply) # find all code blocks in text
32
+ if len(matches) == 1:
33
+ return matches[0].strip('python') # code block
34
+ for match in matches:
35
+ if 'class TerminalFunction' in match:
36
+ return match.strip('python') # code block
37
+ raise RuntimeError("GPT is not generating proper code.")
38
+
39
+ def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
40
+ # 输入
41
+ prompt_compose = [
42
+ f'Your job:\n'
43
+ f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
44
+ f"2. You should write this function to perform following task: " + txt + "\n",
45
+ f"3. Wrap the output python function with markdown codeblock."
46
+ ]
47
+ i_say = "".join(prompt_compose)
48
+ demo = []
49
+
50
+ # 第一步
51
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
52
+ inputs=i_say, inputs_show_user=i_say,
53
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
54
+ sys_prompt= r"You are a programmer."
55
+ )
56
+ history.extend([i_say, gpt_say])
57
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
58
+
59
+ # 第二步
60
+ prompt_compose = [
61
+ "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
62
+ templete
63
+ ]
64
+ i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
65
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
66
+ inputs=i_say, inputs_show_user=inputs_show_user,
67
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
68
+ sys_prompt= r"You are a programmer."
69
+ )
70
+ code_to_return = gpt_say
71
+ history.extend([i_say, gpt_say])
72
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
73
+
74
+ # # 第三步
75
+ # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
76
+ # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
77
+ # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
78
+ # inputs=i_say, inputs_show_user=inputs_show_user,
79
+ # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
80
+ # sys_prompt= r"You are a programmer."
81
+ # )
82
+ # # # 第三步
83
+ # i_say = "Show me how to use `pip` to install packages to run the code above. "
84
+ # i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
85
+ # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
86
+ # inputs=i_say, inputs_show_user=i_say,
87
+ # llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
88
+ # sys_prompt= r"You are a programmer."
89
+ # )
90
+ installation_advance = ""
91
+
92
+ return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
93
+
94
+ def make_module(code):
95
+ module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
96
+ with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f:
97
+ f.write(code)
98
+
99
+ def get_class_name(class_string):
100
+ import re
101
+ # Use regex to extract the class name
102
+ class_name = re.search(r'class (\w+)\(', class_string).group(1)
103
+ return class_name
104
+
105
+ class_name = get_class_name(code)
106
+ return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}"
107
+
108
+ def init_module_instance(module):
109
+ import importlib
110
+ module_, class_ = module.split('->')
111
+ init_f = getattr(importlib.import_module(module_), class_)
112
+ return init_f()
113
+
114
+ def for_immediate_show_off_when_possible(file_type, fp, chatbot):
115
+ if file_type in ['png', 'jpg']:
116
+ image_path = os.path.abspath(fp)
117
+ chatbot.append(['这是一张图片, 展示如下:',
118
+ f'本地文件地址: <br/>`{image_path}`<br/>'+
119
+ f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
120
+ ])
121
+ return chatbot
122
+
123
+ def subprocess_worker(instance, file_path, return_dict):
124
+ return_dict['result'] = instance.run(file_path)
125
+
126
+ def have_any_recent_upload_files(chatbot):
127
+ _5min = 5 * 60
128
+ if not chatbot: return False # chatbot is None
129
+ most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
130
+ if not most_recent_uploaded: return False # most_recent_uploaded is None
131
+ if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
132
+ else: return False # most_recent_uploaded is too old
133
+
134
+ def get_recent_file_prompt_support(chatbot):
135
+ most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
136
+ path = most_recent_uploaded['path']
137
+ return path
138
+
139
+ @CatchException
140
+ def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
141
+ """
142
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
143
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
144
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
145
+ chatbot 聊天显示框的句柄,用于显示给用户
146
+ history 聊天历史,前情提要
147
+ system_prompt 给gpt的静默提醒
148
+ web_port 当前软件运行的端口号
149
+ """
150
+ raise NotImplementedError
151
+
152
+ # 清空历史,以免输入溢出
153
+ history = []; clear_file_downloadzone(chatbot)
154
+
155
+ # 基本信息:功能、贡献者
156
+ chatbot.append([
157
+ "函数插件功能?",
158
+ "CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..."
159
+ ])
160
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
161
+
162
+ if have_any_recent_upload_files(chatbot):
163
+ file_path = get_recent_file_prompt_support(chatbot)
164
+ else:
165
+ chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
166
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
+
168
+ # 读取文件
169
+ if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
170
+ recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
171
+ file_path = recently_uploaded_files[-1]
172
+ file_type = file_path.split('.')[-1]
173
+
174
+ # 粗心检查
175
+ if is_the_upload_folder(txt):
176
+ chatbot.append([
177
+ "...",
178
+ f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"
179
+ ])
180
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
181
+ return
182
+
183
+ # 开始干正事
184
+ for j in range(5): # 最多重试5次
185
+ try:
186
+ code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
187
+ yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
188
+ code = get_code_block(code)
189
+ res = make_module(code)
190
+ instance = init_module_instance(res)
191
+ break
192
+ except Exception as e:
193
+ chatbot.append([f"第{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
194
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
195
+
196
+ # 代码生成结束, 开始执行
197
+ try:
198
+ import multiprocessing
199
+ manager = multiprocessing.Manager()
200
+ return_dict = manager.dict()
201
+
202
+ p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
203
+ # only has 10 seconds to run
204
+ p.start(); p.join(timeout=10)
205
+ if p.is_alive(): p.terminate(); p.join()
206
+ p.close()
207
+ res = return_dict['result']
208
+ # res = instance.run(file_path)
209
+ except Exception as e:
210
+ chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
211
+ # chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
212
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
213
+ return
214
+
215
+ # 顺利完成,收尾
216
+ res = str(res)
217
+ if os.path.exists(res):
218
+ chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
219
+ new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
220
+ chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
221
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
222
+ else:
223
+ chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
224
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
225
+
226
+ """
227
+ 测试:
228
+ 裁剪图像,保留下半部分
229
+ 交换图像的蓝色通道和红色通道
230
+ 将图像转为灰度图像
231
+ 将csv文件转excel表格
232
+ """
crazy_functions/__init__.py ADDED
File without changes
crazy_functions/agent_fns/auto_agent.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
2
+ from toolbox import report_exception, get_log_folder, update_ui_lastest_msg, Singleton
3
+ from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
4
+ from crazy_functions.agent_fns.general import AutoGenGeneral
5
+
6
+
7
+
8
+ class AutoGenMath(AutoGenGeneral):
9
+
10
+ def define_agents(self):
11
+ from autogen import AssistantAgent, UserProxyAgent
12
+ return [
13
+ {
14
+ "name": "assistant", # name of the agent.
15
+ "cls": AssistantAgent, # class of the agent.
16
+ },
17
+ {
18
+ "name": "user_proxy", # name of the agent.
19
+ "cls": UserProxyAgent, # class of the agent.
20
+ "human_input_mode": "ALWAYS", # always ask for human input.
21
+ "llm_config": False, # disables llm-based auto reply.
22
+ },
23
+ ]
crazy_functions/agent_fns/echo_agent.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
2
+
3
+ class EchoDemo(PluginMultiprocessManager):
4
+ def subprocess_worker(self, child_conn):
5
+ # ⭐⭐ 子进程
6
+ self.child_conn = child_conn
7
+ while True:
8
+ msg = self.child_conn.recv() # PipeCom
9
+ if msg.cmd == "user_input":
10
+ # wait futher user input
11
+ self.child_conn.send(PipeCom("show", msg.content))
12
+ wait_success = self.subprocess_worker_wait_user_feedback(wait_msg="我准备好处理下一个问题了.")
13
+ if not wait_success:
14
+ # wait timeout, terminate this subprocess_worker
15
+ break
16
+ elif msg.cmd == "terminate":
17
+ self.child_conn.send(PipeCom("done", ""))
18
+ break
19
+ print('[debug] subprocess_worker terminated')
crazy_functions/agent_fns/general.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import trimmed_format_exc, get_conf, ProxyNetworkActivate
2
+ from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
3
+ from request_llms.bridge_all import predict_no_ui_long_connection
4
+ import time
5
+
6
+ def gpt_academic_generate_oai_reply(
7
+ self,
8
+ messages,
9
+ sender,
10
+ config,
11
+ ):
12
+ llm_config = self.llm_config if config is None else config
13
+ if llm_config is False:
14
+ return False, None
15
+ if messages is None:
16
+ messages = self._oai_messages[sender]
17
+
18
+ inputs = messages[-1]['content']
19
+ history = []
20
+ for message in messages[:-1]:
21
+ history.append(message['content'])
22
+ context=messages[-1].pop("context", None)
23
+ assert context is None, "预留参数 context 未实现"
24
+
25
+ reply = predict_no_ui_long_connection(
26
+ inputs=inputs,
27
+ llm_kwargs=llm_config,
28
+ history=history,
29
+ sys_prompt=self._oai_system_message[0]['content'],
30
+ console_slience=True
31
+ )
32
+ assumed_done = reply.endswith('\nTERMINATE')
33
+ return True, reply
34
+
35
+ class AutoGenGeneral(PluginMultiprocessManager):
36
+ def gpt_academic_print_override(self, user_proxy, message, sender):
37
+ # ⭐⭐ run in subprocess
38
+ self.child_conn.send(PipeCom("show", sender.name + "\n\n---\n\n" + message["content"]))
39
+
40
+ def gpt_academic_get_human_input(self, user_proxy, message):
41
+ # ⭐⭐ run in subprocess
42
+ patience = 300
43
+ begin_waiting_time = time.time()
44
+ self.child_conn.send(PipeCom("interact", message))
45
+ while True:
46
+ time.sleep(0.5)
47
+ if self.child_conn.poll():
48
+ wait_success = True
49
+ break
50
+ if time.time() - begin_waiting_time > patience:
51
+ self.child_conn.send(PipeCom("done", ""))
52
+ wait_success = False
53
+ break
54
+ if wait_success:
55
+ return self.child_conn.recv().content
56
+ else:
57
+ raise TimeoutError("等待用户输入超时")
58
+
59
+ def define_agents(self):
60
+ raise NotImplementedError
61
+
62
+ def exe_autogen(self, input):
63
+ # ⭐⭐ run in subprocess
64
+ input = input.content
65
+ with ProxyNetworkActivate("AutoGen"):
66
+ code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
67
+ agents = self.define_agents()
68
+ user_proxy = None
69
+ assistant = None
70
+ for agent_kwargs in agents:
71
+ agent_cls = agent_kwargs.pop('cls')
72
+ kwargs = {
73
+ 'llm_config':self.llm_kwargs,
74
+ 'code_execution_config':code_execution_config
75
+ }
76
+ kwargs.update(agent_kwargs)
77
+ agent_handle = agent_cls(**kwargs)
78
+ agent_handle._print_received_message = lambda a,b: self.gpt_academic_print_override(agent_kwargs, a, b)
79
+ for d in agent_handle._reply_func_list:
80
+ if hasattr(d['reply_func'],'__name__') and d['reply_func'].__name__ == 'generate_oai_reply':
81
+ d['reply_func'] = gpt_academic_generate_oai_reply
82
+ if agent_kwargs['name'] == 'user_proxy':
83
+ agent_handle.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
84
+ user_proxy = agent_handle
85
+ if agent_kwargs['name'] == 'assistant': assistant = agent_handle
86
+ try:
87
+ if user_proxy is None or assistant is None: raise Exception("用户代理或助理代理未定义")
88
+ user_proxy.initiate_chat(assistant, message=input)
89
+ except Exception as e:
90
+ tb_str = '```\n' + trimmed_format_exc() + '```'
91
+ self.child_conn.send(PipeCom("done", "AutoGen 执行失败: \n\n" + tb_str))
92
+
93
+ def subprocess_worker(self, child_conn):
94
+ # ⭐⭐ run in subprocess
95
+ self.child_conn = child_conn
96
+ while True:
97
+ msg = self.child_conn.recv() # PipeCom
98
+ self.exe_autogen(msg)
99
+
100
+
101
+ class AutoGenGroupChat(AutoGenGeneral):
102
+ def exe_autogen(self, input):
103
+ # ⭐⭐ run in subprocess
104
+ import autogen
105
+
106
+ input = input.content
107
+ with ProxyNetworkActivate("AutoGen"):
108
+ code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
109
+ agents = self.define_agents()
110
+ agents_instances = []
111
+ for agent_kwargs in agents:
112
+ agent_cls = agent_kwargs.pop("cls")
113
+ kwargs = {"code_execution_config": code_execution_config}
114
+ kwargs.update(agent_kwargs)
115
+ agent_handle = agent_cls(**kwargs)
116
+ agent_handle._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
117
+ agents_instances.append(agent_handle)
118
+ if agent_kwargs["name"] == "user_proxy":
119
+ user_proxy = agent_handle
120
+ user_proxy.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
121
+ try:
122
+ groupchat = autogen.GroupChat(agents=agents_instances, messages=[], max_round=50)
123
+ manager = autogen.GroupChatManager(groupchat=groupchat, **self.define_group_chat_manager_config())
124
+ manager._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
125
+ manager.get_human_input = lambda a: self.gpt_academic_get_human_input(manager, a)
126
+ if user_proxy is None:
127
+ raise Exception("user_proxy is not defined")
128
+ user_proxy.initiate_chat(manager, message=input)
129
+ except Exception:
130
+ tb_str = "```\n" + trimmed_format_exc() + "```"
131
+ self.child_conn.send(PipeCom("done", "AutoGen exe failed: \n\n" + tb_str))
132
+
133
+ def define_group_chat_manager_config(self):
134
+ raise NotImplementedError
crazy_functions/agent_fns/persistent.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import Singleton
2
+ @Singleton
3
+ class GradioMultiuserManagerForPersistentClasses():
4
+ def __init__(self):
5
+ self.mapping = {}
6
+
7
+ def already_alive(self, key):
8
+ return (key in self.mapping) and (self.mapping[key].is_alive())
9
+
10
+ def set(self, key, x):
11
+ self.mapping[key] = x
12
+ return self.mapping[key]
13
+
14
+ def get(self, key):
15
+ return self.mapping[key]
16
+
crazy_functions/agent_fns/pipe.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
2
+ from crazy_functions.agent_fns.watchdog import WatchDog
3
+ import time, os
4
+
5
+ class PipeCom:
6
+ def __init__(self, cmd, content) -> None:
7
+ self.cmd = cmd
8
+ self.content = content
9
+
10
+
11
+ class PluginMultiprocessManager:
12
+ def __init__(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
13
+ # ⭐ run in main process
14
+ self.autogen_work_dir = os.path.join(get_log_folder("autogen"), gen_time_str())
15
+ self.previous_work_dir_files = {}
16
+ self.llm_kwargs = llm_kwargs
17
+ self.plugin_kwargs = plugin_kwargs
18
+ self.chatbot = chatbot
19
+ self.history = history
20
+ self.system_prompt = system_prompt
21
+ # self.web_port = web_port
22
+ self.alive = True
23
+ self.use_docker = get_conf("AUTOGEN_USE_DOCKER")
24
+ self.last_user_input = ""
25
+ # create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
26
+ timeout_seconds = 5 * 60
27
+ self.heartbeat_watchdog = WatchDog(timeout=timeout_seconds, bark_fn=self.terminate, interval=5)
28
+ self.heartbeat_watchdog.begin_watch()
29
+
30
+ def feed_heartbeat_watchdog(self):
31
+ # feed this `dog`, so the dog will not `bark` (bark_fn will terminate the instance)
32
+ self.heartbeat_watchdog.feed()
33
+
34
+ def is_alive(self):
35
+ return self.alive
36
+
37
+ def launch_subprocess_with_pipe(self):
38
+ # ⭐ run in main process
39
+ from multiprocessing import Process, Pipe
40
+
41
+ parent_conn, child_conn = Pipe()
42
+ self.p = Process(target=self.subprocess_worker, args=(child_conn,))
43
+ self.p.daemon = True
44
+ self.p.start()
45
+ return parent_conn
46
+
47
+ def terminate(self):
48
+ self.p.terminate()
49
+ self.alive = False
50
+ print("[debug] instance terminated")
51
+
52
+ def subprocess_worker(self, child_conn):
53
+ # ⭐⭐ run in subprocess
54
+ raise NotImplementedError
55
+
56
+ def send_command(self, cmd):
57
+ # ⭐ run in main process
58
+ repeated = False
59
+ if cmd == self.last_user_input:
60
+ repeated = True
61
+ cmd = ""
62
+ else:
63
+ self.last_user_input = cmd
64
+ self.parent_conn.send(PipeCom("user_input", cmd))
65
+ return repeated, cmd
66
+
67
+ def immediate_showoff_when_possible(self, fp):
68
+ # ⭐ 主进程
69
+ # 获取fp的拓展名
70
+ file_type = fp.split('.')[-1]
71
+ # 如果是文本文件, 则直接显示文本内容
72
+ if file_type.lower() in ['png', 'jpg']:
73
+ image_path = os.path.abspath(fp)
74
+ self.chatbot.append([
75
+ '检测到新生图像:',
76
+ f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
77
+ ])
78
+ yield from update_ui(chatbot=self.chatbot, history=self.history)
79
+
80
+ def overwatch_workdir_file_change(self):
81
+ # ⭐ 主进程 Docker 外挂文件夹监控
82
+ path_to_overwatch = self.autogen_work_dir
83
+ change_list = []
84
+ # 扫描路径下的所有文件, 并与self.previous_work_dir_files中所记录的文件进行对比,
85
+ # 如果有新文件出现,或者文件的修改时间发生变化,则更新self.previous_work_dir_files中
86
+ # 把新文件和发生变化的文件的路径记录到 change_list 中
87
+ for root, dirs, files in os.walk(path_to_overwatch):
88
+ for file in files:
89
+ file_path = os.path.join(root, file)
90
+ if file_path not in self.previous_work_dir_files.keys():
91
+ last_modified_time = os.stat(file_path).st_mtime
92
+ self.previous_work_dir_files.update({file_path: last_modified_time})
93
+ change_list.append(file_path)
94
+ else:
95
+ last_modified_time = os.stat(file_path).st_mtime
96
+ if last_modified_time != self.previous_work_dir_files[file_path]:
97
+ self.previous_work_dir_files[file_path] = last_modified_time
98
+ change_list.append(file_path)
99
+ if len(change_list) > 0:
100
+ file_links = ""
101
+ for f in change_list:
102
+ res = promote_file_to_downloadzone(f)
103
+ file_links += f'<br/><a href="file={res}" target="_blank">{res}</a>'
104
+ yield from self.immediate_showoff_when_possible(f)
105
+
106
+ self.chatbot.append(['检测到新生文档.', f'文档清单如下: {file_links}'])
107
+ yield from update_ui(chatbot=self.chatbot, history=self.history)
108
+ return change_list
109
+
110
+
111
+ def main_process_ui_control(self, txt, create_or_resume) -> str:
112
+ # ⭐ 主进程
113
+ if create_or_resume == 'create':
114
+ self.cnt = 1
115
+ self.parent_conn = self.launch_subprocess_with_pipe() # ⭐⭐⭐
116
+ repeated, cmd_to_autogen = self.send_command(txt)
117
+ if txt == 'exit':
118
+ self.chatbot.append([f"结束", "结束信号已明确,终止AutoGen程序。"])
119
+ yield from update_ui(chatbot=self.chatbot, history=self.history)
120
+ self.terminate()
121
+ return "terminate"
122
+
123
+ # patience = 10
124
+
125
+ while True:
126
+ time.sleep(0.5)
127
+ if not self.alive:
128
+ # the heartbeat watchdog might have it killed
129
+ self.terminate()
130
+ return "terminate"
131
+ if self.parent_conn.poll():
132
+ self.feed_heartbeat_watchdog()
133
+ if "[GPT-Academic] 等待中" in self.chatbot[-1][-1]:
134
+ self.chatbot.pop(-1) # remove the last line
135
+ if "等待您的进一步指令" in self.chatbot[-1][-1]:
136
+ self.chatbot.pop(-1) # remove the last line
137
+ if '[GPT-Academic] 等待中' in self.chatbot[-1][-1]:
138
+ self.chatbot.pop(-1) # remove the last line
139
+ msg = self.parent_conn.recv() # PipeCom
140
+ if msg.cmd == "done":
141
+ self.chatbot.append([f"结束", msg.content])
142
+ self.cnt += 1
143
+ yield from update_ui(chatbot=self.chatbot, history=self.history)
144
+ self.terminate()
145
+ break
146
+ if msg.cmd == "show":
147
+ yield from self.overwatch_workdir_file_change()
148
+ notice = ""
149
+ if repeated: notice = "(自动忽略重复的输入)"
150
+ self.chatbot.append([f"运行阶段-{self.cnt}(上次用户反馈输入为: 「{cmd_to_autogen}」{notice}", msg.content])
151
+ self.cnt += 1
152
+ yield from update_ui(chatbot=self.chatbot, history=self.history)
153
+ if msg.cmd == "interact":
154
+ yield from self.overwatch_workdir_file_change()
155
+ self.chatbot.append([f"程序抵达用户反馈节点.", msg.content +
156
+ "\n\n等待您的进一步指令." +
157
+ "\n\n(1) 一般情况下您不需要说什么, 清空输入区, 然后直接点击“提交”以继续. " +
158
+ "\n\n(2) 如果您需要补充些什么, 输入要反馈的内容, 直接点击“提交”以继续. " +
159
+ "\n\n(3) 如果您想终止程序, 输入exit, 直接点击“提交”以终止AutoGen并解锁. "
160
+ ])
161
+ yield from update_ui(chatbot=self.chatbot, history=self.history)
162
+ # do not terminate here, leave the subprocess_worker instance alive
163
+ return "wait_feedback"
164
+ else:
165
+ self.feed_heartbeat_watchdog()
166
+ if '[GPT-Academic] 等待中' not in self.chatbot[-1][-1]:
167
+ # begin_waiting_time = time.time()
168
+ self.chatbot.append(["[GPT-Academic] 等待AutoGen执行结果 ...", "[GPT-Academic] 等待中"])
169
+ self.chatbot[-1] = [self.chatbot[-1][0], self.chatbot[-1][1].replace("[GPT-Academic] 等待中", "[GPT-Academic] 等待中.")]
170
+ yield from update_ui(chatbot=self.chatbot, history=self.history)
171
+ # if time.time() - begin_waiting_time > patience:
172
+ # self.chatbot.append([f"结束", "等待超时, 终止AutoGen程序。"])
173
+ # yield from update_ui(chatbot=self.chatbot, history=self.history)
174
+ # self.terminate()
175
+ # return "terminate"
176
+
177
+ self.terminate()
178
+ return "terminate"
179
+
180
+ def subprocess_worker_wait_user_feedback(self, wait_msg="wait user feedback"):
181
+ # ⭐⭐ run in subprocess
182
+ patience = 5 * 60
183
+ begin_waiting_time = time.time()
184
+ self.child_conn.send(PipeCom("interact", wait_msg))
185
+ while True:
186
+ time.sleep(0.5)
187
+ if self.child_conn.poll():
188
+ wait_success = True
189
+ break
190
+ if time.time() - begin_waiting_time > patience:
191
+ self.child_conn.send(PipeCom("done", ""))
192
+ wait_success = False
193
+ break
194
+ return wait_success
crazy_functions/agent_fns/watchdog.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading, time
2
+
3
+ class WatchDog():
4
+ def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
5
+ self.last_feed = None
6
+ self.timeout = timeout
7
+ self.bark_fn = bark_fn
8
+ self.interval = interval
9
+ self.msg = msg
10
+ self.kill_dog = False
11
+
12
+ def watch(self):
13
+ while True:
14
+ if self.kill_dog: break
15
+ if time.time() - self.last_feed > self.timeout:
16
+ if len(self.msg) > 0: print(self.msg)
17
+ self.bark_fn()
18
+ break
19
+ time.sleep(self.interval)
20
+
21
+ def begin_watch(self):
22
+ self.last_feed = time.time()
23
+ th = threading.Thread(target=self.watch)
24
+ th.daemon = True
25
+ th.start()
26
+
27
+ def feed(self):
28
+ self.last_feed = time.time()
crazy_functions/chatglm微调工具.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui, promote_file_to_downloadzone
2
+ from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
3
+ import datetime, json
4
+
5
+ def fetch_items(list_of_items, batch_size):
6
+ for i in range(0, len(list_of_items), batch_size):
7
+ yield list_of_items[i:i + batch_size]
8
+
9
+ def string_to_options(arguments):
10
+ import argparse
11
+ import shlex
12
+
13
+ # Create an argparse.ArgumentParser instance
14
+ parser = argparse.ArgumentParser()
15
+
16
+ # Add command-line arguments
17
+ parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
18
+ parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
19
+ parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
20
+ parser.add_argument("--batch", type=int, help="System prompt", default=50)
21
+ parser.add_argument("--pre_seq_len", type=int, help="pre_seq_len", default=50)
22
+ parser.add_argument("--learning_rate", type=float, help="learning_rate", default=2e-2)
23
+ parser.add_argument("--num_gpus", type=int, help="num_gpus", default=1)
24
+ parser.add_argument("--json_dataset", type=str, help="json_dataset", default="")
25
+ parser.add_argument("--ptuning_directory", type=str, help="ptuning_directory", default="")
26
+
27
+
28
+
29
+ # Parse the arguments
30
+ args = parser.parse_args(shlex.split(arguments))
31
+
32
+ return args
33
+
34
+ @CatchException
35
+ def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
36
+ """
37
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
38
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
39
+ plugin_kwargs 插件模型的参数
40
+ chatbot 聊天显示框的句柄,用于显示给用户
41
+ history 聊天历史,前情提要
42
+ system_prompt 给gpt的静默提醒
43
+ web_port 当前软件运行的端口号
44
+ """
45
+ history = [] # 清空历史,以免输入溢出
46
+ chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
47
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
48
+ args = plugin_kwargs.get("advanced_arg", None)
49
+ if args is None:
50
+ chatbot.append(("没给定指令", "退出"))
51
+ yield from update_ui(chatbot=chatbot, history=history); return
52
+ else:
53
+ arguments = string_to_options(arguments=args)
54
+
55
+ dat = []
56
+ with open(txt, 'r', encoding='utf8') as f:
57
+ for line in f.readlines():
58
+ json_dat = json.loads(line)
59
+ dat.append(json_dat["content"])
60
+
61
+ llm_kwargs['llm_model'] = arguments.llm_to_learn
62
+ for batch in fetch_items(dat, arguments.batch):
63
+ res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
64
+ inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)],
65
+ inputs_show_user_array=[f"Show Nothing" for _ in (batch)],
66
+ llm_kwargs=llm_kwargs,
67
+ chatbot=chatbot,
68
+ history_array=[[] for _ in (batch)],
69
+ sys_prompt_array=[arguments.system_prompt for _ in (batch)],
70
+ max_workers=10 # OpenAI所允许的最大并行过载
71
+ )
72
+
73
+ with open(txt+'.generated.json', 'a+', encoding='utf8') as f:
74
+ for b, r in zip(batch, res[1::2]):
75
+ f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n')
76
+
77
+ promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot)
78
+ return
79
+
80
+
81
+
82
+ @CatchException
83
+ def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
84
+ """
85
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
86
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
87
+ plugin_kwargs 插件模型的参数
88
+ chatbot 聊天显示框的句柄,用于显示给用户
89
+ history 聊天历史,前情提要
90
+ system_prompt 给gpt的静默提醒
91
+ web_port 当前软件运行的端口号
92
+ """
93
+ import subprocess
94
+ history = [] # 清空历史,以免输入溢出
95
+ chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
96
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
97
+ args = plugin_kwargs.get("advanced_arg", None)
98
+ if args is None:
99
+ chatbot.append(("没给定指令", "退出"))
100
+ yield from update_ui(chatbot=chatbot, history=history); return
101
+ else:
102
+ arguments = string_to_options(arguments=args)
103
+
104
+
105
+
106
+ pre_seq_len = arguments.pre_seq_len # 128
107
+ learning_rate = arguments.learning_rate # 2e-2
108
+ num_gpus = arguments.num_gpus # 1
109
+ json_dataset = arguments.json_dataset # 't_code.json'
110
+ ptuning_directory = arguments.ptuning_directory # '/home/hmp/ChatGLM2-6B/ptuning'
111
+
112
+ command = f"torchrun --standalone --nnodes=1 --nproc-per-node={num_gpus} main.py \
113
+ --do_train \
114
+ --train_file AdvertiseGen/{json_dataset} \
115
+ --validation_file AdvertiseGen/{json_dataset} \
116
+ --preprocessing_num_workers 20 \
117
+ --prompt_column content \
118
+ --response_column summary \
119
+ --overwrite_cache \
120
+ --model_name_or_path THUDM/chatglm2-6b \
121
+ --output_dir output/clothgen-chatglm2-6b-pt-{pre_seq_len}-{learning_rate} \
122
+ --overwrite_output_dir \
123
+ --max_source_length 256 \
124
+ --max_target_length 256 \
125
+ --per_device_train_batch_size 1 \
126
+ --per_device_eval_batch_size 1 \
127
+ --gradient_accumulation_steps 16 \
128
+ --predict_with_generate \
129
+ --max_steps 100 \
130
+ --logging_steps 10 \
131
+ --save_steps 20 \
132
+ --learning_rate {learning_rate} \
133
+ --pre_seq_len {pre_seq_len} \
134
+ --quantization_bit 4"
135
+
136
+ process = subprocess.Popen(command, shell=True, cwd=ptuning_directory)
137
+ try:
138
+ process.communicate(timeout=3600*24)
139
+ except subprocess.TimeoutExpired:
140
+ process.kill()
141
+ return
crazy_functions/crazy_utils.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
2
+ import threading
3
+ import os
4
+ import logging
5
+
6
+ def input_clipping(inputs, history, max_token_limit):
7
+ import numpy as np
8
+ from request_llms.bridge_all import model_info
9
+ enc = model_info["gpt-3.5-turbo"]['tokenizer']
10
+ def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
11
+
12
+ mode = 'input-and-history'
13
+ # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
14
+ input_token_num = get_token_num(inputs)
15
+ if input_token_num < max_token_limit//2:
16
+ mode = 'only-history'
17
+ max_token_limit = max_token_limit - input_token_num
18
+
19
+ everything = [inputs] if mode == 'input-and-history' else ['']
20
+ everything.extend(history)
21
+ n_token = get_token_num('\n'.join(everything))
22
+ everything_token = [get_token_num(e) for e in everything]
23
+ delta = max(everything_token) // 16 # 截断时的颗粒度
24
+
25
+ while n_token > max_token_limit:
26
+ where = np.argmax(everything_token)
27
+ encoded = enc.encode(everything[where], disallowed_special=())
28
+ clipped_encoded = encoded[:len(encoded)-delta]
29
+ everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
30
+ everything_token[where] = get_token_num(everything[where])
31
+ n_token = get_token_num('\n'.join(everything))
32
+
33
+ if mode == 'input-and-history':
34
+ inputs = everything[0]
35
+ else:
36
+ pass
37
+ history = everything[1:]
38
+ return inputs, history
39
+
40
+ def request_gpt_model_in_new_thread_with_ui_alive(
41
+ inputs, inputs_show_user, llm_kwargs,
42
+ chatbot, history, sys_prompt, refresh_interval=0.2,
43
+ handle_token_exceed=True,
44
+ retry_times_at_unknown_error=2,
45
+ ):
46
+ """
47
+ Request GPT model,请求GPT模型同时维持用户界面活跃。
48
+
49
+ 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
50
+ inputs (string): List of inputs (输入)
51
+ inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
52
+ top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
53
+ temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
54
+ chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
55
+ history (list): List of chat history (历史,对话历史列表)
56
+ sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
57
+ refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
58
+ handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
59
+ retry_times_at_unknown_error:失败时的重试次数
60
+
61
+ 输出 Returns:
62
+ future: 输出,GPT返回的结果
63
+ """
64
+ import time
65
+ from concurrent.futures import ThreadPoolExecutor
66
+ from request_llms.bridge_all import predict_no_ui_long_connection
67
+ # 用户反馈
68
+ chatbot.append([inputs_show_user, ""])
69
+ yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
70
+ executor = ThreadPoolExecutor(max_workers=16)
71
+ mutable = ["", time.time(), ""]
72
+ # 看门狗耐心
73
+ watch_dog_patience = 5
74
+ # 请求任务
75
+ def _req_gpt(inputs, history, sys_prompt):
76
+ retry_op = retry_times_at_unknown_error
77
+ exceeded_cnt = 0
78
+ while True:
79
+ # watchdog error
80
+ if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
81
+ raise RuntimeError("检测到程序终止。")
82
+ try:
83
+ # 【第一种情况】:顺利完成
84
+ result = predict_no_ui_long_connection(
85
+ inputs=inputs, llm_kwargs=llm_kwargs,
86
+ history=history, sys_prompt=sys_prompt, observe_window=mutable)
87
+ return result
88
+ except ConnectionAbortedError as token_exceeded_error:
89
+ # 【第二种情况】:Token溢出
90
+ if handle_token_exceed:
91
+ exceeded_cnt += 1
92
+ # 【选择处理】 尝试计算比例,尽可能多地保留文本
93
+ from toolbox import get_reduce_token_percent
94
+ p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
95
+ MAX_TOKEN = get_max_token(llm_kwargs)
96
+ EXCEED_ALLO = 512 + 512 * exceeded_cnt
97
+ inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
98
+ mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
99
+ continue # 返回重试
100
+ else:
101
+ # 【选择放弃】
102
+ tb_str = '```\n' + trimmed_format_exc() + '```'
103
+ mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
104
+ return mutable[0] # 放弃
105
+ except:
106
+ # 【第三种情况】:其他错误:重试几次
107
+ tb_str = '```\n' + trimmed_format_exc() + '```'
108
+ print(tb_str)
109
+ mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
110
+ if retry_op > 0:
111
+ retry_op -= 1
112
+ mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
113
+ if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
114
+ time.sleep(30)
115
+ time.sleep(5)
116
+ continue # 返回重试
117
+ else:
118
+ time.sleep(5)
119
+ return mutable[0] # 放弃
120
+
121
+ # 提交任务
122
+ future = executor.submit(_req_gpt, inputs, history, sys_prompt)
123
+ while True:
124
+ # yield一次以刷新前端页面
125
+ time.sleep(refresh_interval)
126
+ # “喂狗”(看门狗)
127
+ mutable[1] = time.time()
128
+ if future.done():
129
+ break
130
+ chatbot[-1] = [chatbot[-1][0], mutable[0]]
131
+ yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
132
+
133
+ final_result = future.result()
134
+ chatbot[-1] = [chatbot[-1][0], final_result]
135
+ yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
136
+ return final_result
137
+
138
+ def can_multi_process(llm):
139
+ if llm.startswith('gpt-'): return True
140
+ if llm.startswith('api2d-'): return True
141
+ if llm.startswith('azure-'): return True
142
+ if llm.startswith('spark'): return True
143
+ if llm.startswith('zhipuai'): return True
144
+ return False
145
+
146
+ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
147
+ inputs_array, inputs_show_user_array, llm_kwargs,
148
+ chatbot, history_array, sys_prompt_array,
149
+ refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
150
+ handle_token_exceed=True, show_user_at_complete=False,
151
+ retry_times_at_unknown_error=2,
152
+ ):
153
+ """
154
+ Request GPT model using multiple threads with UI and high efficiency
155
+ 请求GPT模型的[多线程]版。
156
+ 具备以下功能:
157
+ 实时在UI上反馈远程数据流
158
+ 使用线程池,可调节线程池的大小避免openai的流量限制错误
159
+ 处理中途中止的情况
160
+ 网络等出问题时,会把traceback和已经接收的数据转入输出
161
+
162
+ 输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
163
+ inputs_array (list): List of inputs (每个子任务的输入)
164
+ inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
165
+ llm_kwargs: llm_kwargs参数
166
+ chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化)
167
+ history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史)
168
+ sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
169
+ refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
170
+ max_workers (int, optional): Maximum number of threads (default: see config.py) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
171
+ scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果)
172
+ handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本)
173
+ handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
174
+ show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框)
175
+ retry_times_at_unknown_error:子任务失败时的重试次数
176
+
177
+ 输出 Returns:
178
+ list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错信息,方��调试和定位问题。)
179
+ """
180
+ import time, random
181
+ from concurrent.futures import ThreadPoolExecutor
182
+ from request_llms.bridge_all import predict_no_ui_long_connection
183
+ assert len(inputs_array) == len(history_array)
184
+ assert len(inputs_array) == len(sys_prompt_array)
185
+ if max_workers == -1: # 读取配置文件
186
+ try: max_workers = get_conf('DEFAULT_WORKER_NUM')
187
+ except: max_workers = 8
188
+ if max_workers <= 0: max_workers = 3
189
+ # 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
190
+ if not can_multi_process(llm_kwargs['llm_model']):
191
+ max_workers = 1
192
+
193
+ executor = ThreadPoolExecutor(max_workers=max_workers)
194
+ n_frag = len(inputs_array)
195
+ # 用户反馈
196
+ chatbot.append(["请开始多线程操作。", ""])
197
+ yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
198
+ # 跨线程传递
199
+ mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
200
+
201
+ # 看门狗耐心
202
+ watch_dog_patience = 5
203
+
204
+ # 子线程任务
205
+ def _req_gpt(index, inputs, history, sys_prompt):
206
+ gpt_say = ""
207
+ retry_op = retry_times_at_unknown_error
208
+ exceeded_cnt = 0
209
+ mutable[index][2] = "执行中"
210
+ detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
211
+ while True:
212
+ # watchdog error
213
+ if detect_timeout(): raise RuntimeError("检测到程序终止。")
214
+ try:
215
+ # 【第一种情况】:顺利完成
216
+ gpt_say = predict_no_ui_long_connection(
217
+ inputs=inputs, llm_kwargs=llm_kwargs, history=history,
218
+ sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
219
+ )
220
+ mutable[index][2] = "已成功"
221
+ return gpt_say
222
+ except ConnectionAbortedError as token_exceeded_error:
223
+ # 【第二种情况】:Token溢出
224
+ if handle_token_exceed:
225
+ exceeded_cnt += 1
226
+ # 【选择处理】 尝试计算比例,尽可能多地保留文本
227
+ from toolbox import get_reduce_token_percent
228
+ p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
229
+ MAX_TOKEN = get_max_token(llm_kwargs)
230
+ EXCEED_ALLO = 512 + 512 * exceeded_cnt
231
+ inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
232
+ gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
233
+ mutable[index][2] = f"截断重试"
234
+ continue # 返回重试
235
+ else:
236
+ # 【选择放弃】
237
+ tb_str = '```\n' + trimmed_format_exc() + '```'
238
+ gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
239
+ if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
240
+ mutable[index][2] = "输入过长已放弃"
241
+ return gpt_say # 放弃
242
+ except:
243
+ # 【第三种情况】:其他错误
244
+ if detect_timeout(): raise RuntimeError("检测到程序终止。")
245
+ tb_str = '```\n' + trimmed_format_exc() + '```'
246
+ print(tb_str)
247
+ gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
248
+ if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
249
+ if retry_op > 0:
250
+ retry_op -= 1
251
+ wait = random.randint(5, 20)
252
+ if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
253
+ wait = wait * 3
254
+ fail_info = "OpenAI绑定信用卡可解除频率限制 "
255
+ else:
256
+ fail_info = ""
257
+ # 也许等待十几秒后,情况会好转
258
+ for i in range(wait):
259
+ mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
260
+ # 开始重试
261
+ if detect_timeout(): raise RuntimeError("检测到程序终止。")
262
+ mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
263
+ continue # 返回重试
264
+ else:
265
+ mutable[index][2] = "已失败"
266
+ wait = 5
267
+ time.sleep(5)
268
+ return gpt_say # 放弃
269
+
270
+ # 异步任务开始
271
+ futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
272
+ range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
273
+ cnt = 0
274
+ while True:
275
+ # yield一次以刷新前端页面
276
+ time.sleep(refresh_interval)
277
+ cnt += 1
278
+ worker_done = [h.done() for h in futures]
279
+ # 更好的UI视觉效果
280
+ observe_win = []
281
+ # 每个线程都要“喂狗”(看门狗)
282
+ for thread_index, _ in enumerate(worker_done):
283
+ mutable[thread_index][1] = time.time()
284
+ # 在前端打印些好玩的东西
285
+ for thread_index, _ in enumerate(worker_done):
286
+ print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
287
+ replace('\n', '').replace('`', '.').replace(
288
+ ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
289
+ observe_win.append(print_something_really_funny)
290
+ # 在前端打印些好玩的东西
291
+ stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
292
+ if not done else f'`{mutable[thread_index][2]}`\n\n'
293
+ for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
294
+ # 在前端打印些好玩的东西
295
+ chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
296
+ yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
297
+ if all(worker_done):
298
+ executor.shutdown()
299
+ break
300
+
301
+ # 异步任务结束
302
+ gpt_response_collection = []
303
+ for inputs_show_user, f in zip(inputs_show_user_array, futures):
304
+ gpt_res = f.result()
305
+ gpt_response_collection.extend([inputs_show_user, gpt_res])
306
+
307
+ # 是否在结束时,在界面上显示结果
308
+ if show_user_at_complete:
309
+ for inputs_show_user, f in zip(inputs_show_user_array, futures):
310
+ gpt_res = f.result()
311
+ chatbot.append([inputs_show_user, gpt_res])
312
+ yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
313
+ time.sleep(0.5)
314
+ return gpt_response_collection
315
+
316
+
317
+
318
+ def read_and_clean_pdf_text(fp):
319
+ """
320
+ 这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
321
+
322
+ **输入参数说明**
323
+ - `fp`:需要读取和清理文本的pdf文件路径
324
+
325
+ **输出参数说明**
326
+ - `meta_txt`:清理后的文本内容字符串
327
+ - `page_one_meta`:第一页清理后的文本内容列表
328
+
329
+ **函数功能**
330
+ 读取pdf文件并清理其中的文本内容,清理规则包括:
331
+ - 提取所有块元的文本信息,并合并为一个字符串
332
+ - 去除短块(字符数小于100)并替换为回车符
333
+ - 清理多余的空行
334
+ - 合并小写字母开头的段落块并替换为空格
335
+ - 清除重复的换行
336
+ - 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
337
+ """
338
+ import fitz, copy
339
+ import re
340
+ import numpy as np
341
+ from colorful import print亮黄, print亮绿
342
+ fc = 0 # Index 0 文本
343
+ fs = 1 # Index 1 字体
344
+ fb = 2 # Index 2 框框
345
+ REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
346
+ REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
347
+ def primary_ffsize(l):
348
+ """
349
+ 提取文本块主字体
350
+ """
351
+ fsize_statiscs = {}
352
+ for wtf in l['spans']:
353
+ if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
354
+ fsize_statiscs[wtf['size']] += len(wtf['text'])
355
+ return max(fsize_statiscs, key=fsize_statiscs.get)
356
+
357
+ def ffsize_same(a,b):
358
+ """
359
+ 提取字体大小是否近似相等
360
+ """
361
+ return abs((a-b)/max(a,b)) < 0.02
362
+
363
+ with fitz.open(fp) as doc:
364
+ meta_txt = []
365
+ meta_font = []
366
+
367
+ meta_line = []
368
+ meta_span = []
369
+ ############################## <第 1 步,搜集初始信息> ##################################
370
+ for index, page in enumerate(doc):
371
+ # file_content += page.get_text()
372
+ text_areas = page.get_text("dict") # 获取页面上的文本信息
373
+ for t in text_areas['blocks']:
374
+ if 'lines' in t:
375
+ pf = 998
376
+ for l in t['lines']:
377
+ txt_line = "".join([wtf['text'] for wtf in l['spans']])
378
+ if len(txt_line) == 0: continue
379
+ pf = primary_ffsize(l)
380
+ meta_line.append([txt_line, pf, l['bbox'], l])
381
+ for wtf in l['spans']: # for l in t['lines']:
382
+ meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
383
+ # meta_line.append(["NEW_BLOCK", pf])
384
+ # 块元提取 for each word segment with in line for each line cross-line words for each block
385
+ meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
386
+ '- ', '') for t in text_areas['blocks'] if 'lines' in t])
387
+ meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
388
+ for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
389
+ if index == 0:
390
+ page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
391
+ '- ', '') for t in text_areas['blocks'] if 'lines' in t]
392
+
393
+ ############################## <第 2 步,获取正文主字体> ##################################
394
+ try:
395
+ fsize_statiscs = {}
396
+ for span in meta_span:
397
+ if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
398
+ fsize_statiscs[span[1]] += span[2]
399
+ main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
400
+ if REMOVE_FOOT_NOTE:
401
+ give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
402
+ except:
403
+ raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}。')
404
+ ############################## <第 3 步,切分和重新整合> ##################################
405
+ mega_sec = []
406
+ sec = []
407
+ for index, line in enumerate(meta_line):
408
+ if index == 0:
409
+ sec.append(line[fc])
410
+ continue
411
+ if REMOVE_FOOT_NOTE:
412
+ if meta_line[index][fs] <= give_up_fize_threshold:
413
+ continue
414
+ if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
415
+ # 尝试识别段落
416
+ if meta_line[index][fc].endswith('.') and\
417
+ (meta_line[index-1][fc] != 'NEW_BLOCK') and \
418
+ (meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
419
+ sec[-1] += line[fc]
420
+ sec[-1] += "\n\n"
421
+ else:
422
+ sec[-1] += " "
423
+ sec[-1] += line[fc]
424
+ else:
425
+ if (index+1 < len(meta_line)) and \
426
+ meta_line[index][fs] > main_fsize:
427
+ # 单行 + 字体大
428
+ mega_sec.append(copy.deepcopy(sec))
429
+ sec = []
430
+ sec.append("# " + line[fc])
431
+ else:
432
+ # 尝试识别section
433
+ if meta_line[index-1][fs] > meta_line[index][fs]:
434
+ sec.append("\n" + line[fc])
435
+ else:
436
+ sec.append(line[fc])
437
+ mega_sec.append(copy.deepcopy(sec))
438
+
439
+ finals = []
440
+ for ms in mega_sec:
441
+ final = " ".join(ms)
442
+ final = final.replace('- ', ' ')
443
+ finals.append(final)
444
+ meta_txt = finals
445
+
446
+ ############################## <第 4 步,乱七八糟的后处理> ##################################
447
+ def 把字符太少的块清除为回车(meta_txt):
448
+ for index, block_txt in enumerate(meta_txt):
449
+ if len(block_txt) < 100:
450
+ meta_txt[index] = '\n'
451
+ return meta_txt
452
+ meta_txt = 把字符太少的块清除为回车(meta_txt)
453
+
454
+ def 清理多余的空行(meta_txt):
455
+ for index in reversed(range(1, len(meta_txt))):
456
+ if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
457
+ meta_txt.pop(index)
458
+ return meta_txt
459
+ meta_txt = 清理多余的空行(meta_txt)
460
+
461
+ def 合并小写开头的段落块(meta_txt):
462
+ def starts_with_lowercase_word(s):
463
+ pattern = r"^[a-z]+"
464
+ match = re.match(pattern, s)
465
+ if match:
466
+ return True
467
+ else:
468
+ return False
469
+ # 对于某些PDF会有第一个段落就以小写字母开头,为了避免索引错误将其更改为大写
470
+ if starts_with_lowercase_word(meta_txt[0]):
471
+ meta_txt[0] = meta_txt[0].capitalize()
472
+ for _ in range(100):
473
+ for index, block_txt in enumerate(meta_txt):
474
+ if starts_with_lowercase_word(block_txt):
475
+ if meta_txt[index-1] != '\n':
476
+ meta_txt[index-1] += ' '
477
+ else:
478
+ meta_txt[index-1] = ''
479
+ meta_txt[index-1] += meta_txt[index]
480
+ meta_txt[index] = '\n'
481
+ return meta_txt
482
+ meta_txt = 合并小写开头的段落块(meta_txt)
483
+ meta_txt = 清理多余的空行(meta_txt)
484
+
485
+ meta_txt = '\n'.join(meta_txt)
486
+ # 清除重复的换行
487
+ for _ in range(5):
488
+ meta_txt = meta_txt.replace('\n\n', '\n')
489
+
490
+ # 换行 -> 双换行
491
+ meta_txt = meta_txt.replace('\n', '\n\n')
492
+
493
+ ############################## <第 5 步,展示分割效果> ##################################
494
+ # for f in finals:
495
+ # print亮黄(f)
496
+ # print亮绿('***************************')
497
+
498
+ return meta_txt, page_one_meta
499
+
500
+
501
+ def get_files_from_everything(txt, type): # type='.md'
502
+ """
503
+ 这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
504
+ 下面是对每个参数和返回值的说明:
505
+ 参数
506
+ - txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
507
+ - type: 字符串,表示要搜索的文件类型。默认是.md。
508
+ 返回值
509
+ - success: 布尔值,表示函数是否成功执行。
510
+ - file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
511
+ - project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
512
+ 该函数详细注释已添加,请确认是否满足您的需要。
513
+ """
514
+ import glob, os
515
+
516
+ success = True
517
+ if txt.startswith('http'):
518
+ # 网络的远程文件
519
+ import requests
520
+ from toolbox import get_conf
521
+ from toolbox import get_log_folder, gen_time_str
522
+ proxies = get_conf('proxies')
523
+ try:
524
+ r = requests.get(txt, proxies=proxies)
525
+ except:
526
+ raise ConnectionRefusedError(f"无法下载资源{txt},请检查。")
527
+ path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type)
528
+ with open(path, 'wb+') as f: f.write(r.content)
529
+ project_folder = get_log_folder(plugin_name='web_download')
530
+ file_manifest = [path]
531
+ elif txt.endswith(type):
532
+ # 直接给定文件
533
+ file_manifest = [txt]
534
+ project_folder = os.path.dirname(txt)
535
+ elif os.path.exists(txt):
536
+ # 本地路径,递归搜索
537
+ project_folder = txt
538
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
539
+ if len(file_manifest) == 0:
540
+ success = False
541
+ else:
542
+ project_folder = None
543
+ file_manifest = []
544
+ success = False
545
+
546
+ return success, file_manifest, project_folder
547
+
548
+
549
+
550
+ @Singleton
551
+ class nougat_interface():
552
+ def __init__(self):
553
+ self.threadLock = threading.Lock()
554
+
555
+ def nougat_with_timeout(self, command, cwd, timeout=3600):
556
+ import subprocess
557
+ from toolbox import ProxyNetworkActivate
558
+ logging.info(f'正在执行命令 {command}')
559
+ with ProxyNetworkActivate("Nougat_Download"):
560
+ process = subprocess.Popen(command, shell=True, cwd=cwd, env=os.environ)
561
+ try:
562
+ stdout, stderr = process.communicate(timeout=timeout)
563
+ except subprocess.TimeoutExpired:
564
+ process.kill()
565
+ stdout, stderr = process.communicate()
566
+ print("Process timed out!")
567
+ return False
568
+ return True
569
+
570
+
571
+ def NOUGAT_parse_pdf(self, fp, chatbot, history):
572
+ from toolbox import update_ui_lastest_msg
573
+
574
+ yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
575
+ chatbot=chatbot, history=history, delay=0)
576
+ self.threadLock.acquire()
577
+ import glob, threading, os
578
+ from toolbox import get_log_folder, gen_time_str
579
+ dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
580
+ os.makedirs(dst)
581
+
582
+ yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
583
+ chatbot=chatbot, history=history, delay=0)
584
+ self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
585
+ res = glob.glob(os.path.join(dst,'*.mmd'))
586
+ if len(res) == 0:
587
+ self.threadLock.release()
588
+ raise RuntimeError("Nougat解析论文失败。")
589
+ self.threadLock.release()
590
+ return res[0]
591
+
592
+
593
+
594
+
595
+ def try_install_deps(deps, reload_m=[]):
596
+ import subprocess, sys, importlib
597
+ for dep in deps:
598
+ subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
599
+ import site
600
+ importlib.reload(site)
601
+ for m in reload_m:
602
+ importlib.reload(__import__(m))
603
+
604
+
605
+ def get_plugin_arg(plugin_kwargs, key, default):
606
+ # 如果参数是空的
607
+ if (key in plugin_kwargs) and (plugin_kwargs[key] == ""): plugin_kwargs.pop(key)
608
+ # 正常情况
609
+ return plugin_kwargs.get(key, default)
crazy_functions/gen_fns/gen_fns_shared.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import importlib
3
+ from toolbox import trimmed_format_exc, gen_time_str, get_log_folder
4
+ from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
5
+ from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
6
+ import multiprocessing
7
+
8
+ def get_class_name(class_string):
9
+ import re
10
+ # Use regex to extract the class name
11
+ class_name = re.search(r'class (\w+)\(', class_string).group(1)
12
+ return class_name
13
+
14
+ def try_make_module(code, chatbot):
15
+ module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
16
+ fn_path = f'{get_log_folder(plugin_name="gen_plugin_verify")}/{module_file}.py'
17
+ with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
18
+ promote_file_to_downloadzone(fn_path, chatbot=chatbot)
19
+ class_name = get_class_name(code)
20
+ manager = multiprocessing.Manager()
21
+ return_dict = manager.dict()
22
+ p = multiprocessing.Process(target=is_function_successfully_generated, args=(fn_path, class_name, return_dict))
23
+ # only has 10 seconds to run
24
+ p.start(); p.join(timeout=10)
25
+ if p.is_alive(): p.terminate(); p.join()
26
+ p.close()
27
+ return return_dict["success"], return_dict['traceback']
28
+
29
+ # check is_function_successfully_generated
30
+ def is_function_successfully_generated(fn_path, class_name, return_dict):
31
+ return_dict['success'] = False
32
+ return_dict['traceback'] = ""
33
+ try:
34
+ # Create a spec for the module
35
+ module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
36
+ # Load the module
37
+ example_module = importlib.util.module_from_spec(module_spec)
38
+ module_spec.loader.exec_module(example_module)
39
+ # Now you can use the module
40
+ some_class = getattr(example_module, class_name)
41
+ # Now you can create an instance of the class
42
+ instance = some_class()
43
+ return_dict['success'] = True
44
+ return
45
+ except:
46
+ return_dict['traceback'] = trimmed_format_exc()
47
+ return
48
+
49
+ def subprocess_worker(code, file_path, return_dict):
50
+ return_dict['result'] = None
51
+ return_dict['success'] = False
52
+ return_dict['traceback'] = ""
53
+ try:
54
+ module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
55
+ fn_path = f'{get_log_folder(plugin_name="gen_plugin_run")}/{module_file}.py'
56
+ with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
57
+ class_name = get_class_name(code)
58
+ # Create a spec for the module
59
+ module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
60
+ # Load the module
61
+ example_module = importlib.util.module_from_spec(module_spec)
62
+ module_spec.loader.exec_module(example_module)
63
+ # Now you can use the module
64
+ some_class = getattr(example_module, class_name)
65
+ # Now you can create an instance of the class
66
+ instance = some_class()
67
+ return_dict['result'] = instance.run(file_path)
68
+ return_dict['success'] = True
69
+ except:
70
+ return_dict['traceback'] = trimmed_format_exc()
crazy_functions/ipc_fns/mp.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+ import pickle
3
+ import multiprocessing
4
+
5
+ def run_in_subprocess_wrapper_func(v_args):
6
+ func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
7
+ import sys
8
+ try:
9
+ result = func(*args, **kwargs)
10
+ return_dict['result'] = result
11
+ except Exception as e:
12
+ exc_info = sys.exc_info()
13
+ exception_dict['exception'] = exc_info
14
+
15
+ def run_in_subprocess_with_timeout(func, timeout=60):
16
+ if platform.system() == 'Linux':
17
+ def wrapper(*args, **kwargs):
18
+ return_dict = multiprocessing.Manager().dict()
19
+ exception_dict = multiprocessing.Manager().dict()
20
+ v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
21
+ process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
22
+ process.start()
23
+ process.join(timeout)
24
+ if process.is_alive():
25
+ process.terminate()
26
+ raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
27
+ process.close()
28
+ if 'exception' in exception_dict:
29
+ # ooops, the subprocess ran into an exception
30
+ exc_info = exception_dict['exception']
31
+ raise exc_info[1].with_traceback(exc_info[2])
32
+ if 'result' in return_dict.keys():
33
+ # If the subprocess ran successfully, return the result
34
+ return return_dict['result']
35
+ return wrapper
36
+ else:
37
+ return func
crazy_functions/json_fns/pydantic_io.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ https://github.com/langchain-ai/langchain/blob/master/docs/extras/modules/model_io/output_parsers/pydantic.ipynb
3
+
4
+ Example 1.
5
+
6
+ # Define your desired data structure.
7
+ class Joke(BaseModel):
8
+ setup: str = Field(description="question to set up a joke")
9
+ punchline: str = Field(description="answer to resolve the joke")
10
+
11
+ # You can add custom validation logic easily with Pydantic.
12
+ @validator("setup")
13
+ def question_ends_with_question_mark(cls, field):
14
+ if field[-1] != "?":
15
+ raise ValueError("Badly formed question!")
16
+ return field
17
+
18
+
19
+ Example 2.
20
+
21
+ # Here's another example, but with a compound typed field.
22
+ class Actor(BaseModel):
23
+ name: str = Field(description="name of an actor")
24
+ film_names: List[str] = Field(description="list of names of films they starred in")
25
+ """
26
+
27
+ import json, re, logging
28
+
29
+
30
+ PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
31
+
32
+ As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
33
+ the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
34
+
35
+ Here is the output schema:
36
+ ```
37
+ {schema}
38
+ ```"""
39
+
40
+
41
+ PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
42
+ ```
43
+ {schema}
44
+ ```"""
45
+
46
+ class JsonStringError(Exception): ...
47
+
48
+ class GptJsonIO():
49
+
50
+ def __init__(self, schema, example_instruction=True):
51
+ self.pydantic_object = schema
52
+ self.example_instruction = example_instruction
53
+ self.format_instructions = self.generate_format_instructions()
54
+
55
+ def generate_format_instructions(self):
56
+ schema = self.pydantic_object.schema()
57
+
58
+ # Remove extraneous fields.
59
+ reduced_schema = schema
60
+ if "title" in reduced_schema:
61
+ del reduced_schema["title"]
62
+ if "type" in reduced_schema:
63
+ del reduced_schema["type"]
64
+ # Ensure json in context is well-formed with double quotes.
65
+ if self.example_instruction:
66
+ schema_str = json.dumps(reduced_schema)
67
+ return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
68
+ else:
69
+ return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str)
70
+
71
+ def generate_output(self, text):
72
+ # Greedy search for 1st json candidate.
73
+ match = re.search(
74
+ r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
75
+ )
76
+ json_str = ""
77
+ if match: json_str = match.group()
78
+ json_object = json.loads(json_str, strict=False)
79
+ final_object = self.pydantic_object.parse_obj(json_object)
80
+ return final_object
81
+
82
+ def generate_repair_prompt(self, broken_json, error):
83
+ prompt = "Fix a broken json string.\n\n" + \
84
+ "(1) The broken json string need to fix is: \n\n" + \
85
+ "```" + "\n" + \
86
+ broken_json + "\n" + \
87
+ "```" + "\n\n" + \
88
+ "(2) The error message is: \n\n" + \
89
+ error + "\n\n" + \
90
+ "Now, fix this json string. \n\n"
91
+ return prompt
92
+
93
+ def generate_output_auto_repair(self, response, gpt_gen_fn):
94
+ """
95
+ response: string containing canidate json
96
+ gpt_gen_fn: gpt_gen_fn(inputs, sys_prompt)
97
+ """
98
+ try:
99
+ result = self.generate_output(response)
100
+ except Exception as e:
101
+ try:
102
+ logging.info(f'Repairing json:{response}')
103
+ repair_prompt = self.generate_repair_prompt(broken_json = response, error=repr(e))
104
+ result = self.generate_output(gpt_gen_fn(repair_prompt, self.format_instructions))
105
+ logging.info('Repaire json success.')
106
+ except Exception as e:
107
+ # 没辙了,放弃治疗
108
+ logging.info('Repaire json fail.')
109
+ raise JsonStringError('Cannot repair json.', str(e))
110
+ return result
111
+
crazy_functions/live_audio/aliyunASR.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time, logging, json, sys, struct
2
+ import numpy as np
3
+ from scipy.io.wavfile import WAVE_FORMAT
4
+
5
+ def write_numpy_to_wave(filename, rate, data, add_header=False):
6
+ """
7
+ Write a NumPy array as a WAV file.
8
+ """
9
+ def _array_tofile(fid, data):
10
+ # ravel gives a c-contiguous buffer
11
+ fid.write(data.ravel().view('b').data)
12
+
13
+ if hasattr(filename, 'write'):
14
+ fid = filename
15
+ else:
16
+ fid = open(filename, 'wb')
17
+
18
+ fs = rate
19
+
20
+ try:
21
+ dkind = data.dtype.kind
22
+ if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
23
+ data.dtype.itemsize == 1)):
24
+ raise ValueError("Unsupported data type '%s'" % data.dtype)
25
+
26
+ header_data = b''
27
+
28
+ header_data += b'RIFF'
29
+ header_data += b'\x00\x00\x00\x00'
30
+ header_data += b'WAVE'
31
+
32
+ # fmt chunk
33
+ header_data += b'fmt '
34
+ if dkind == 'f':
35
+ format_tag = WAVE_FORMAT.IEEE_FLOAT
36
+ else:
37
+ format_tag = WAVE_FORMAT.PCM
38
+ if data.ndim == 1:
39
+ channels = 1
40
+ else:
41
+ channels = data.shape[1]
42
+ bit_depth = data.dtype.itemsize * 8
43
+ bytes_per_second = fs*(bit_depth // 8)*channels
44
+ block_align = channels * (bit_depth // 8)
45
+
46
+ fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
47
+ bytes_per_second, block_align, bit_depth)
48
+ if not (dkind == 'i' or dkind == 'u'):
49
+ # add cbSize field for non-PCM files
50
+ fmt_chunk_data += b'\x00\x00'
51
+
52
+ header_data += struct.pack('<I', len(fmt_chunk_data))
53
+ header_data += fmt_chunk_data
54
+
55
+ # fact chunk (non-PCM files)
56
+ if not (dkind == 'i' or dkind == 'u'):
57
+ header_data += b'fact'
58
+ header_data += struct.pack('<II', 4, data.shape[0])
59
+
60
+ # check data size (needs to be immediately before the data chunk)
61
+ if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
62
+ raise ValueError("Data exceeds wave file size limit")
63
+ if add_header:
64
+ fid.write(header_data)
65
+ # data chunk
66
+ fid.write(b'data')
67
+ fid.write(struct.pack('<I', data.nbytes))
68
+ if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
69
+ sys.byteorder == 'big'):
70
+ data = data.byteswap()
71
+ _array_tofile(fid, data)
72
+
73
+ if add_header:
74
+ # Determine file size and place it in correct
75
+ # position at start of the file.
76
+ size = fid.tell()
77
+ fid.seek(4)
78
+ fid.write(struct.pack('<I', size-8))
79
+
80
+ finally:
81
+ if not hasattr(filename, 'write'):
82
+ fid.close()
83
+ else:
84
+ fid.seek(0)
85
+
86
+ def is_speaker_speaking(vad, data, sample_rate):
87
+ # Function to detect if the speaker is speaking
88
+ # The WebRTC VAD only accepts 16-bit mono PCM audio,
89
+ # sampled at 8000, 16000, 32000 or 48000 Hz.
90
+ # A frame must be either 10, 20, or 30 ms in duration:
91
+ frame_duration = 30
92
+ n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
93
+ res_list = []
94
+ for t in range(len(data)):
95
+ if t!=0 and t % n_bit_each == 0:
96
+ res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
97
+
98
+ info = ''.join(['^' if r else '.' for r in res_list])
99
+ info = info[:10]
100
+ if any(res_list):
101
+ return True, info
102
+ else:
103
+ return False, info
104
+
105
+
106
+ class AliyunASR():
107
+
108
+ def test_on_sentence_begin(self, message, *args):
109
+ # print("test_on_sentence_begin:{}".format(message))
110
+ pass
111
+
112
+ def test_on_sentence_end(self, message, *args):
113
+ # print("test_on_sentence_end:{}".format(message))
114
+ message = json.loads(message)
115
+ self.parsed_sentence = message['payload']['result']
116
+ self.event_on_entence_end.set()
117
+ # print(self.parsed_sentence)
118
+
119
+ def test_on_start(self, message, *args):
120
+ # print("test_on_start:{}".format(message))
121
+ pass
122
+
123
+ def test_on_error(self, message, *args):
124
+ logging.error("on_error args=>{}".format(args))
125
+ pass
126
+
127
+ def test_on_close(self, *args):
128
+ self.aliyun_service_ok = False
129
+ pass
130
+
131
+ def test_on_result_chg(self, message, *args):
132
+ # print("test_on_chg:{}".format(message))
133
+ message = json.loads(message)
134
+ self.parsed_text = message['payload']['result']
135
+ self.event_on_result_chg.set()
136
+
137
+ def test_on_completed(self, message, *args):
138
+ # print("on_completed:args=>{} message=>{}".format(args, message))
139
+ pass
140
+
141
+ def audio_convertion_thread(self, uuid):
142
+ # 在一个异步线程中采集音频
143
+ import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
144
+ import tempfile
145
+ from scipy import io
146
+ from toolbox import get_conf
147
+ from .audio_io import change_sample_rate
148
+ from .audio_io import RealtimeAudioDistribution
149
+ NEW_SAMPLERATE = 16000
150
+ rad = RealtimeAudioDistribution()
151
+ rad.clean_up()
152
+ temp_folder = tempfile.gettempdir()
153
+ TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
154
+ if len(TOKEN) == 0:
155
+ TOKEN = self.get_token()
156
+ self.aliyun_service_ok = True
157
+ URL="wss://nls-gateway.aliyuncs.com/ws/v1"
158
+ sr = nls.NlsSpeechTranscriber(
159
+ url=URL,
160
+ token=TOKEN,
161
+ appkey=APPKEY,
162
+ on_sentence_begin=self.test_on_sentence_begin,
163
+ on_sentence_end=self.test_on_sentence_end,
164
+ on_start=self.test_on_start,
165
+ on_result_changed=self.test_on_result_chg,
166
+ on_completed=self.test_on_completed,
167
+ on_error=self.test_on_error,
168
+ on_close=self.test_on_close,
169
+ callback_args=[uuid.hex]
170
+ )
171
+ timeout_limit_second = 20
172
+ r = sr.start(aformat="pcm",
173
+ timeout=timeout_limit_second,
174
+ enable_intermediate_result=True,
175
+ enable_punctuation_prediction=True,
176
+ enable_inverse_text_normalization=True)
177
+
178
+ import webrtcvad
179
+ vad = webrtcvad.Vad()
180
+ vad.set_mode(1)
181
+
182
+ is_previous_frame_transmitted = False # 上一帧是否有人说话
183
+ previous_frame_data = None
184
+ echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据
185
+ echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据
186
+ keep_alive_last_send_time = time.time()
187
+ while not self.stop:
188
+ # time.sleep(self.capture_interval)
189
+ audio = rad.read(uuid.hex)
190
+ if audio is not None:
191
+ # convert to pcm file
192
+ temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
193
+ dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
194
+ write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata)
195
+ # read pcm binary
196
+ with open(temp_file, "rb") as f: data = f.read()
197
+ is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE)
198
+
199
+ if is_speaking or echo_cnt > 0:
200
+ # 如果话筒激活 / 如果处于回声收尾阶段
201
+ echo_cnt -= 1
202
+ if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上
203
+ if previous_frame_data is not None: data = previous_frame_data + data
204
+ if is_speaking:
205
+ echo_cnt = echo_cnt_max
206
+ slices = zip(*(iter(data),) * 640) # 640个字节为一组
207
+ for i in slices: sr.send_audio(bytes(i))
208
+ keep_alive_last_send_time = time.time()
209
+ is_previous_frame_transmitted = True
210
+ else:
211
+ is_previous_frame_transmitted = False
212
+ echo_cnt = 0
213
+ # 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器
214
+ if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
215
+ slices = zip(*(iter(data),) * 640) # 640个字节为一组
216
+ for i in slices: sr.send_audio(bytes(i))
217
+ keep_alive_last_send_time = time.time()
218
+ is_previous_frame_transmitted = True
219
+ self.audio_shape = info
220
+ else:
221
+ time.sleep(0.1)
222
+
223
+ if not self.aliyun_service_ok:
224
+ self.stop = True
225
+ self.stop_msg = 'Aliyun音频服务异常,请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
226
+ r = sr.stop()
227
+
228
+ def get_token(self):
229
+ from toolbox import get_conf
230
+ import json
231
+ from aliyunsdkcore.request import CommonRequest
232
+ from aliyunsdkcore.client import AcsClient
233
+ AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET')
234
+
235
+ # 创建AcsClient实例
236
+ client = AcsClient(
237
+ AccessKey_ID,
238
+ AccessKey_secret,
239
+ "cn-shanghai"
240
+ )
241
+
242
+ # 创建request,并设置参数。
243
+ request = CommonRequest()
244
+ request.set_method('POST')
245
+ request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
246
+ request.set_version('2019-02-28')
247
+ request.set_action_name('CreateToken')
248
+
249
+ try:
250
+ response = client.do_action_with_exception(request)
251
+ print(response)
252
+ jss = json.loads(response)
253
+ if 'Token' in jss and 'Id' in jss['Token']:
254
+ token = jss['Token']['Id']
255
+ expireTime = jss['Token']['ExpireTime']
256
+ print("token = " + token)
257
+ print("expireTime = " + str(expireTime))
258
+ except Exception as e:
259
+ print(e)
260
+
261
+ return token
crazy_functions/live_audio/audio_io.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from scipy import interpolate
3
+
4
+ def Singleton(cls):
5
+ _instance = {}
6
+
7
+ def _singleton(*args, **kargs):
8
+ if cls not in _instance:
9
+ _instance[cls] = cls(*args, **kargs)
10
+ return _instance[cls]
11
+
12
+ return _singleton
13
+
14
+
15
+ @Singleton
16
+ class RealtimeAudioDistribution():
17
+ def __init__(self) -> None:
18
+ self.data = {}
19
+ self.max_len = 1024*1024
20
+ self.rate = 48000 # 只读,每秒采样数量
21
+
22
+ def clean_up(self):
23
+ self.data = {}
24
+
25
+ def feed(self, uuid, audio):
26
+ self.rate, audio_ = audio
27
+ # print('feed', len(audio_), audio_[-25:])
28
+ if uuid not in self.data:
29
+ self.data[uuid] = audio_
30
+ else:
31
+ new_arr = np.concatenate((self.data[uuid], audio_))
32
+ if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
33
+ self.data[uuid] = new_arr
34
+
35
+ def read(self, uuid):
36
+ if uuid in self.data:
37
+ res = self.data.pop(uuid)
38
+ # print('\r read-', len(res), '-', max(res), end='', flush=True)
39
+ else:
40
+ res = None
41
+ return res
42
+
43
+ def change_sample_rate(audio, old_sr, new_sr):
44
+ duration = audio.shape[0] / old_sr
45
+
46
+ time_old = np.linspace(0, duration, audio.shape[0])
47
+ time_new = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
48
+
49
+ interpolator = interpolate.interp1d(time_old, audio.T)
50
+ new_audio = interpolator(time_new).T
51
+ return new_audio.astype(np.int16)
crazy_functions/multi_stage/multi_stage_utils.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List
3
+ from toolbox import update_ui_lastest_msg, disable_auto_promotion
4
+ from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
5
+ from request_llms.bridge_all import predict_no_ui_long_connection
6
+ from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
7
+ import time
8
+ import pickle
9
+
10
+ def have_any_recent_upload_files(chatbot):
11
+ _5min = 5 * 60
12
+ if not chatbot: return False # chatbot is None
13
+ most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
14
+ if not most_recent_uploaded: return False # most_recent_uploaded is None
15
+ if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
16
+ else: return False # most_recent_uploaded is too old
17
+
18
+ class GptAcademicState():
19
+ def __init__(self):
20
+ self.reset()
21
+
22
+ def reset(self):
23
+ pass
24
+
25
+ def dump_state(self, chatbot):
26
+ chatbot._cookies['plugin_state'] = pickle.dumps(self)
27
+
28
+ def set_state(self, chatbot, key, value):
29
+ setattr(self, key, value)
30
+ chatbot._cookies['plugin_state'] = pickle.dumps(self)
31
+
32
+ def get_state(chatbot, cls=None):
33
+ state = chatbot._cookies.get('plugin_state', None)
34
+ if state is not None: state = pickle.loads(state)
35
+ elif cls is not None: state = cls()
36
+ else: state = GptAcademicState()
37
+ state.chatbot = chatbot
38
+ return state
39
+
40
+
41
+ class GptAcademicGameBaseState():
42
+ """
43
+ 1. first init: __init__ ->
44
+ """
45
+ def init_game(self, chatbot, lock_plugin):
46
+ self.plugin_name = None
47
+ self.callback_fn = None
48
+ self.delete_game = False
49
+ self.step_cnt = 0
50
+
51
+ def lock_plugin(self, chatbot):
52
+ if self.callback_fn is None:
53
+ raise ValueError("callback_fn is None")
54
+ chatbot._cookies['lock_plugin'] = self.callback_fn
55
+ self.dump_state(chatbot)
56
+
57
+ def get_plugin_name(self):
58
+ if self.plugin_name is None:
59
+ raise ValueError("plugin_name is None")
60
+ return self.plugin_name
61
+
62
+ def dump_state(self, chatbot):
63
+ chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
64
+
65
+ def set_state(self, chatbot, key, value):
66
+ setattr(self, key, value)
67
+ chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
68
+
69
+ @staticmethod
70
+ def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
71
+ state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
72
+ if state is not None:
73
+ state = pickle.loads(state)
74
+ else:
75
+ state = cls()
76
+ state.init_game(chatbot, lock_plugin)
77
+ state.plugin_name = plugin_name
78
+ state.llm_kwargs = llm_kwargs
79
+ state.chatbot = chatbot
80
+ state.callback_fn = callback_fn
81
+ return state
82
+
83
+ def continue_game(self, prompt, chatbot, history):
84
+ # 游戏主体
85
+ yield from self.step(prompt, chatbot, history)
86
+ self.step_cnt += 1
87
+ # 保存状态,收尾
88
+ self.dump_state(chatbot)
89
+ # 如果游戏结束,清理
90
+ if self.delete_game:
91
+ chatbot._cookies['lock_plugin'] = None
92
+ chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
93
+ yield from update_ui(chatbot=chatbot, history=history)
crazy_functions/pdf_fns/breakdown_txt.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
2
+
3
+ def force_breakdown(txt, limit, get_token_fn):
4
+ """ 当无法用标点、空行分割时,我们用最暴力的方法切割
5
+ """
6
+ for i in reversed(range(len(txt))):
7
+ if get_token_fn(txt[:i]) < limit:
8
+ return txt[:i], txt[i:]
9
+ return "Tiktoken未知错误", "Tiktoken未知错误"
10
+
11
+
12
+ def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
13
+ """ 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
14
+ 当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出
15
+ """
16
+ _min = int(5e4)
17
+ _max = int(1e5)
18
+ # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
19
+ if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
20
+ remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
21
+ remain_txt_to_cut_storage = ""
22
+ if len(remain_txt_to_cut) > _max:
23
+ remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
24
+ remain_txt_to_cut = remain_txt_to_cut[:_max]
25
+ return remain_txt_to_cut, remain_txt_to_cut_storage
26
+
27
+
28
+ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
29
+ """ 文本切分
30
+ """
31
+ res = []
32
+ total_len = len(txt_tocut)
33
+ fin_len = 0
34
+ remain_txt_to_cut = txt_tocut
35
+ remain_txt_to_cut_storage = ""
36
+ # 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
37
+ remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
38
+
39
+ while True:
40
+ if get_token_fn(remain_txt_to_cut) <= limit:
41
+ # 如果剩余文本的token数小于限制,那么就不用切了
42
+ res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
43
+ break
44
+ else:
45
+ # 如果剩余文本的token数大于限制,那么就切
46
+ lines = remain_txt_to_cut.split('\n')
47
+
48
+ # 估计一个切分点
49
+ estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
50
+ estimated_line_cut = int(estimated_line_cut)
51
+
52
+ # 开始查找合适切分点的偏移(cnt)
53
+ cnt = 0
54
+ for cnt in reversed(range(estimated_line_cut)):
55
+ if must_break_at_empty_line:
56
+ # 首先尝试用双空行(\n\n)作为切分点
57
+ if lines[cnt] != "":
58
+ continue
59
+ prev = "\n".join(lines[:cnt])
60
+ post = "\n".join(lines[cnt:])
61
+ if get_token_fn(prev) < limit:
62
+ break
63
+
64
+ if cnt == 0:
65
+ # 如果没有找到合适的切分点
66
+ if break_anyway:
67
+ # 是否允许暴力切分
68
+ prev, post = force_breakdown(remain_txt_to_cut, limit, get_token_fn)
69
+ else:
70
+ # 不允许直接报错
71
+ raise RuntimeError(f"存在一行极长的文本!{remain_txt_to_cut}")
72
+
73
+ # 追加列表
74
+ res.append(prev); fin_len+=len(prev)
75
+ # 准备下一次迭代
76
+ remain_txt_to_cut = post
77
+ remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
78
+ process = fin_len/total_len
79
+ print(f'正在文本切分 {int(process*100)}%')
80
+ if len(remain_txt_to_cut.strip()) == 0:
81
+ break
82
+ return res
83
+
84
+
85
+ def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
86
+ """ 使用多种方式尝试切分文本,以满足 token 限制
87
+ """
88
+ from request_llms.bridge_all import model_info
89
+ enc = model_info[llm_model]['tokenizer']
90
+ def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
91
+ try:
92
+ # 第1次尝试,将双空行(\n\n)作为切分点
93
+ return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
94
+ except RuntimeError:
95
+ try:
96
+ # 第2次尝试,将单空行(\n)作为切分点
97
+ return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
98
+ except RuntimeError:
99
+ try:
100
+ # 第3次尝试,将英文句号(.)作为切分点
101
+ res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
102
+ return [r.replace('。\n', '.') for r in res]
103
+ except RuntimeError as e:
104
+ try:
105
+ # 第4次尝试,将中文句号(。)作为切分点
106
+ res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
107
+ return [r.replace('。。\n', '。') for r in res]
108
+ except RuntimeError as e:
109
+ # 第5次尝试,没办法了,随便切一下吧
110
+ return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
111
+
112
+ breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
113
+
114
+ if __name__ == '__main__':
115
+ from crazy_functions.crazy_utils import read_and_clean_pdf_text
116
+ file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
117
+
118
+ from request_llms.bridge_all import model_info
119
+ for i in range(5):
120
+ file_content += file_content
121
+
122
+ print(len(file_content))
123
+ TOKEN_LIMIT_PER_FRAGMENT = 2500
124
+ res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
125
+
crazy_functions/pdf_fns/parse_pdf.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import lru_cache
2
+ from toolbox import gen_time_str
3
+ from toolbox import promote_file_to_downloadzone
4
+ from toolbox import write_history_to_file, promote_file_to_downloadzone
5
+ from toolbox import get_conf
6
+ from toolbox import ProxyNetworkActivate
7
+ from colorful import *
8
+ import requests
9
+ import random
10
+ import copy
11
+ import os
12
+ import math
13
+
14
+ class GROBID_OFFLINE_EXCEPTION(Exception): pass
15
+
16
+ def get_avail_grobid_url():
17
+ GROBID_URLS = get_conf('GROBID_URLS')
18
+ if len(GROBID_URLS) == 0: return None
19
+ try:
20
+ _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
21
+ if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
22
+ with ProxyNetworkActivate('Connect_Grobid'):
23
+ res = requests.get(_grobid_url+'/api/isalive')
24
+ if res.text=='true': return _grobid_url
25
+ else: return None
26
+ except:
27
+ return None
28
+
29
+ @lru_cache(maxsize=32)
30
+ def parse_pdf(pdf_path, grobid_url):
31
+ import scipdf # pip install scipdf_parser
32
+ if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
33
+ try:
34
+ with ProxyNetworkActivate('Connect_Grobid'):
35
+ article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
36
+ except GROBID_OFFLINE_EXCEPTION:
37
+ raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
38
+ except:
39
+ raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
40
+ return article_dict
41
+
42
+
43
+ def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files):
44
+ # -=-=-=-=-=-=-=-= 写出第1个文件:翻译前后混合 -=-=-=-=-=-=-=-=
45
+ res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=f"{gen_time_str()}translated_and_original.md", file_fullname=None)
46
+ promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
47
+ generated_conclusion_files.append(res_path)
48
+
49
+ # -=-=-=-=-=-=-=-= 写出第2个文件:仅翻译后的文本 -=-=-=-=-=-=-=-=
50
+ translated_res_array = []
51
+ # 记录当前的大章节标题:
52
+ last_section_name = ""
53
+ for index, value in enumerate(gpt_response_collection):
54
+ # 先挑选偶数序列号:
55
+ if index % 2 != 0:
56
+ # 先提取当前英文标题:
57
+ cur_section_name = gpt_response_collection[index-1].split('\n')[0].split(" Part")[0]
58
+ # 如果index是1的话,则直接使用first section name:
59
+ if cur_section_name != last_section_name:
60
+ cur_value = cur_section_name + '\n'
61
+ last_section_name = copy.deepcopy(cur_section_name)
62
+ else:
63
+ cur_value = ""
64
+ # 再做一个小修改:重新修改当前part的标题,默认用英文的
65
+ cur_value += value
66
+ translated_res_array.append(cur_value)
67
+ res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + translated_res_array,
68
+ file_basename = f"{gen_time_str()}-translated_only.md",
69
+ file_fullname = None,
70
+ auto_caption = False)
71
+ promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
72
+ generated_conclusion_files.append(res_path)
73
+ return res_path
74
+
75
+ def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
76
+ from crazy_functions.pdf_fns.report_gen_html import construct_html
77
+ from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
78
+ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
79
+ from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
80
+
81
+ prompt = "以下是一篇学术论文的基本信息:\n"
82
+ # title
83
+ title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
84
+ # authors
85
+ authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
86
+ # abstract
87
+ abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
88
+ # command
89
+ prompt += f"请将题目和摘要翻译为{DST_LANG}。"
90
+ meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
91
+
92
+ # 单线,获取文章meta信息
93
+ paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
94
+ inputs=prompt,
95
+ inputs_show_user=prompt,
96
+ llm_kwargs=llm_kwargs,
97
+ chatbot=chatbot, history=[],
98
+ sys_prompt="You are an academic paper reader。",
99
+ )
100
+
101
+ # 多线,翻译
102
+ inputs_array = []
103
+ inputs_show_user_array = []
104
+
105
+ # get_token_num
106
+ from request_llms.bridge_all import model_info
107
+ enc = model_info[llm_kwargs['llm_model']]['tokenizer']
108
+ def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
109
+
110
+ def break_down(txt):
111
+ raw_token_num = get_token_num(txt)
112
+ if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
113
+ return [txt]
114
+ else:
115
+ # raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
116
+ # find a smooth token limit to achieve even seperation
117
+ count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
118
+ token_limit_smooth = raw_token_num // count + count
119
+ return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
120
+
121
+ for section in article_dict.get('sections'):
122
+ if len(section['text']) == 0: continue
123
+ section_frags = break_down(section['text'])
124
+ for i, fragment in enumerate(section_frags):
125
+ heading = section['heading']
126
+ if len(section_frags) > 1: heading += f' Part-{i+1}'
127
+ inputs_array.append(
128
+ f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
129
+ )
130
+ inputs_show_user_array.append(
131
+ f"# {heading}\n\n{fragment}"
132
+ )
133
+
134
+ gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
135
+ inputs_array=inputs_array,
136
+ inputs_show_user_array=inputs_show_user_array,
137
+ llm_kwargs=llm_kwargs,
138
+ chatbot=chatbot,
139
+ history_array=[meta for _ in inputs_array],
140
+ sys_prompt_array=[
141
+ "请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
142
+ )
143
+ # -=-=-=-=-=-=-=-= 写出Markdown文件 -=-=-=-=-=-=-=-=
144
+ produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files)
145
+
146
+ # -=-=-=-=-=-=-=-= 写出HTML文件 -=-=-=-=-=-=-=-=
147
+ ch = construct_html()
148
+ orig = ""
149
+ trans = ""
150
+ gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
151
+ for i,k in enumerate(gpt_response_collection_html):
152
+ if i%2==0:
153
+ gpt_response_collection_html[i] = inputs_show_user_array[i//2]
154
+ else:
155
+ # 先提取当前英文标题:
156
+ cur_section_name = gpt_response_collection[i-1].split('\n')[0].split(" Part")[0]
157
+ cur_value = cur_section_name + "\n" + gpt_response_collection_html[i]
158
+ gpt_response_collection_html[i] = cur_value
159
+
160
+ final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
161
+ final.extend(gpt_response_collection_html)
162
+ for i, k in enumerate(final):
163
+ if i%2==0:
164
+ orig = k
165
+ if i%2==1:
166
+ trans = k
167
+ ch.add_row(a=orig, b=trans)
168
+ create_report_file_name = f"{os.path.basename(fp)}.trans.html"
169
+ html_file = ch.save_file(create_report_file_name)
170
+ generated_conclusion_files.append(html_file)
171
+ promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
crazy_functions/pdf_fns/report_gen_html.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
2
+ import os
3
+
4
+
5
+
6
+
7
+ class construct_html():
8
+ def __init__(self) -> None:
9
+ self.html_string = ""
10
+
11
+ def add_row(self, a, b):
12
+ from toolbox import markdown_convertion
13
+ template = """
14
+ {
15
+ primary_col: {
16
+ header: String.raw`__PRIMARY_HEADER__`,
17
+ msg: String.raw`__PRIMARY_MSG__`,
18
+ },
19
+ secondary_rol: {
20
+ header: String.raw`__SECONDARY_HEADER__`,
21
+ msg: String.raw`__SECONDARY_MSG__`,
22
+ }
23
+ },
24
+ """
25
+ def std(str):
26
+ str = str.replace(r'`',r'&#96;')
27
+ if str.endswith("\\"): str += ' '
28
+ if str.endswith("}"): str += ' '
29
+ if str.endswith("$"): str += ' '
30
+ return str
31
+
32
+ template_ = template
33
+ a_lines = a.split('\n')
34
+ b_lines = b.split('\n')
35
+
36
+ if len(a_lines) == 1 or len(a_lines[0]) > 50:
37
+ template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
38
+ template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
39
+ else:
40
+ template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
41
+ template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
42
+
43
+ if len(b_lines) == 1 or len(b_lines[0]) > 50:
44
+ template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
45
+ template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
46
+ else:
47
+ template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
48
+ template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
49
+ self.html_string += template_
50
+
51
+ def save_file(self, file_name):
52
+ from toolbox import get_log_folder
53
+ with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
54
+ html_template = f.read()
55
+ html_template = html_template.replace("__TF_ARR__", self.html_string)
56
+ with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
57
+ f.write(html_template.encode('utf-8', 'ignore').decode())
58
+ return os.path.join(get_log_folder(), file_name)
crazy_functions/pdf_fns/report_template.html ADDED
The diff for this file is too large to render. See raw diff
 
crazy_functions/vt_fns/vt_call_plugin.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List
3
+ from toolbox import update_ui_lastest_msg, disable_auto_promotion
4
+ from request_llms.bridge_all import predict_no_ui_long_connection
5
+ from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
6
+ import copy, json, pickle, os, sys, time
7
+
8
+
9
+ def read_avail_plugin_enum():
10
+ from crazy_functional import get_crazy_functions
11
+ plugin_arr = get_crazy_functions()
12
+ # remove plugins with out explaination
13
+ plugin_arr = {k:v for k, v in plugin_arr.items() if 'Info' in v}
14
+ plugin_arr_info = {"F_{:04d}".format(i):v["Info"] for i, v in enumerate(plugin_arr.values(), start=1)}
15
+ plugin_arr_dict = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
16
+ plugin_arr_dict_parse = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
17
+ plugin_arr_dict_parse.update({f"F_{i}":v for i, v in enumerate(plugin_arr.values(), start=1)})
18
+ prompt = json.dumps(plugin_arr_info, ensure_ascii=False, indent=2)
19
+ prompt = "\n\nThe defination of PluginEnum:\nPluginEnum=" + prompt
20
+ return prompt, plugin_arr_dict, plugin_arr_dict_parse
21
+
22
+ def wrap_code(txt):
23
+ txt = txt.replace('```','')
24
+ return f"\n```\n{txt}\n```\n"
25
+
26
+ def have_any_recent_upload_files(chatbot):
27
+ _5min = 5 * 60
28
+ if not chatbot: return False # chatbot is None
29
+ most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
30
+ if not most_recent_uploaded: return False # most_recent_uploaded is None
31
+ if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
32
+ else: return False # most_recent_uploaded is too old
33
+
34
+ def get_recent_file_prompt_support(chatbot):
35
+ most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
36
+ path = most_recent_uploaded['path']
37
+ prompt = "\nAdditional Information:\n"
38
+ prompt = "In case that this plugin requires a path or a file as argument,"
39
+ prompt += f"it is important for you to know that the user has recently uploaded a file, located at: `{path}`"
40
+ prompt += f"Only use it when necessary, otherwise, you can ignore this file."
41
+ return prompt
42
+
43
+ def get_inputs_show_user(inputs, plugin_arr_enum_prompt):
44
+ # remove plugin_arr_enum_prompt from inputs string
45
+ inputs_show_user = inputs.replace(plugin_arr_enum_prompt, "")
46
+ inputs_show_user += plugin_arr_enum_prompt[:200] + '...'
47
+ inputs_show_user += '\n...\n'
48
+ inputs_show_user += '...\n'
49
+ inputs_show_user += '...}'
50
+ return inputs_show_user
51
+
52
+ def execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
53
+ plugin_arr_enum_prompt, plugin_arr_dict, plugin_arr_dict_parse = read_avail_plugin_enum()
54
+ class Plugin(BaseModel):
55
+ plugin_selection: str = Field(description="The most related plugin from one of the PluginEnum.", default="F_0000")
56
+ reason_of_selection: str = Field(description="The reason why you should select this plugin.", default="This plugin satisfy user requirement most")
57
+ # ⭐ ⭐ ⭐ 选择插件
58
+ yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n查找可用插件中...", chatbot=chatbot, history=history, delay=0)
59
+ gpt_json_io = GptJsonIO(Plugin)
60
+ gpt_json_io.format_instructions = "The format of your output should be a json that can be parsed by json.loads.\n"
61
+ gpt_json_io.format_instructions += """Output example: {"plugin_selection":"F_1234", "reason_of_selection":"F_1234 plugin satisfy user requirement most"}\n"""
62
+ gpt_json_io.format_instructions += "The plugins you are authorized to use are listed below:\n"
63
+ gpt_json_io.format_instructions += plugin_arr_enum_prompt
64
+ inputs = "Choose the correct plugin according to user requirements, the user requirement is: \n\n" + \
65
+ ">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
66
+
67
+ run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
68
+ inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
69
+ try:
70
+ gpt_reply = run_gpt_fn(inputs, "")
71
+ plugin_sel = gpt_json_io.generate_output_auto_repair(gpt_reply, run_gpt_fn)
72
+ except JsonStringError:
73
+ msg = f"抱歉, {llm_kwargs['llm_model']}无法理解您的需求。"
74
+ msg += "请求的Prompt为:\n" + wrap_code(get_inputs_show_user(inputs, plugin_arr_enum_prompt))
75
+ msg += "语言模型回复为:\n" + wrap_code(gpt_reply)
76
+ msg += "\n但您可以尝试再试一次\n"
77
+ yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
78
+ return
79
+ if plugin_sel.plugin_selection not in plugin_arr_dict_parse:
80
+ msg = f"抱歉, 找不到合适插件执行该任务, 或者{llm_kwargs['llm_model']}无法理解您的需求。"
81
+ msg += f"语言模型{llm_kwargs['llm_model']}选择了不存在的插件:\n" + wrap_code(gpt_reply)
82
+ msg += "\n但您可以尝试再试一次\n"
83
+ yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
84
+ return
85
+
86
+ # ⭐ ⭐ ⭐ 确认插件参数
87
+ if not have_any_recent_upload_files(chatbot):
88
+ appendix_info = ""
89
+ else:
90
+ appendix_info = get_recent_file_prompt_support(chatbot)
91
+
92
+ plugin = plugin_arr_dict_parse[plugin_sel.plugin_selection]
93
+ yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n提取插件参数...", chatbot=chatbot, history=history, delay=0)
94
+ class PluginExplicit(BaseModel):
95
+ plugin_selection: str = plugin_sel.plugin_selection
96
+ plugin_arg: str = Field(description="The argument of the plugin.", default="")
97
+ gpt_json_io = GptJsonIO(PluginExplicit)
98
+ gpt_json_io.format_instructions += "The information about this plugin is:" + plugin["Info"]
99
+ inputs = f"A plugin named {plugin_sel.plugin_selection} is selected, " + \
100
+ "you should extract plugin_arg from the user requirement, the user requirement is: \n\n" + \
101
+ ">> " + (txt + appendix_info).rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
102
+ gpt_json_io.format_instructions
103
+ run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
104
+ inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
105
+ plugin_sel = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
106
+
107
+
108
+ # ⭐ ⭐ ⭐ 执行插件
109
+ fn = plugin['Function']
110
+ fn_name = fn.__name__
111
+ msg = f'{llm_kwargs["llm_model"]}为您选择了插件: `{fn_name}`\n\n插件说明:{plugin["Info"]}\n\n插件参数:{plugin_sel.plugin_arg}\n\n假如偏离了您的要求,按停止键终止。'
112
+ yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
113
+ yield from fn(plugin_sel.plugin_arg, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, -1)
114
+ return
crazy_functions/vt_fns/vt_modify_config.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import List
3
+ from toolbox import update_ui_lastest_msg, get_conf
4
+ from request_llms.bridge_all import predict_no_ui_long_connection
5
+ from crazy_functions.json_fns.pydantic_io import GptJsonIO
6
+ import copy, json, pickle, os, sys
7
+
8
+
9
+ def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
10
+ ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
11
+ if not ALLOW_RESET_CONFIG:
12
+ yield from update_ui_lastest_msg(
13
+ lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
14
+ chatbot=chatbot, history=history, delay=2
15
+ )
16
+ return
17
+
18
+ # ⭐ ⭐ ⭐ 读取可配置项目条目
19
+ names = {}
20
+ from enum import Enum
21
+ import config
22
+ for k, v in config.__dict__.items():
23
+ if k.startswith('__'): continue
24
+ names.update({k:k})
25
+ # if len(names) > 20: break # 限制最多前10个配置项,如果太多了会导致gpt无法理解
26
+
27
+ ConfigOptions = Enum('ConfigOptions', names)
28
+ class ModifyConfigurationIntention(BaseModel):
29
+ which_config_to_modify: ConfigOptions = Field(description="the name of the configuration to modify, you must choose from one of the ConfigOptions enum.", default=None)
30
+ new_option_value: str = Field(description="the new value of the option", default=None)
31
+
32
+ # ⭐ ⭐ ⭐ 分析用户意图
33
+ yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n读取新配置中", chatbot=chatbot, history=history, delay=0)
34
+ gpt_json_io = GptJsonIO(ModifyConfigurationIntention)
35
+ inputs = "Analyze how to change configuration according to following user input, answer me with json: \n\n" + \
36
+ ">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
37
+ gpt_json_io.format_instructions
38
+
39
+ run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
40
+ inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
41
+ user_intention = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
42
+
43
+ explicit_conf = user_intention.which_config_to_modify.value
44
+
45
+ ok = (explicit_conf in txt)
46
+ if ok:
47
+ yield from update_ui_lastest_msg(
48
+ lastmsg=f"正在执行任务: {txt}\n\n新配置{explicit_conf}={user_intention.new_option_value}",
49
+ chatbot=chatbot, history=history, delay=1
50
+ )
51
+ yield from update_ui_lastest_msg(
52
+ lastmsg=f"正在执行任务: {txt}\n\n新配置{explicit_conf}={user_intention.new_option_value}\n\n正在修改配置中",
53
+ chatbot=chatbot, history=history, delay=2
54
+ )
55
+
56
+ # ⭐ ⭐ ⭐ 立即应用配置
57
+ from toolbox import set_conf
58
+ set_conf(explicit_conf, user_intention.new_option_value)
59
+
60
+ yield from update_ui_lastest_msg(
61
+ lastmsg=f"正在执行任务: {txt}\n\n配置修改完成,重新页面即可生效。", chatbot=chatbot, history=history, delay=1
62
+ )
63
+ else:
64
+ yield from update_ui_lastest_msg(
65
+ lastmsg=f"失败,如果需要配置{explicit_conf},您需要明确说明并在指令中提到它。", chatbot=chatbot, history=history, delay=5
66
+ )
67
+
68
+ def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
69
+ ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
70
+ if not ALLOW_RESET_CONFIG:
71
+ yield from update_ui_lastest_msg(
72
+ lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
73
+ chatbot=chatbot, history=history, delay=2
74
+ )
75
+ return
76
+
77
+ yield from modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
78
+ yield from update_ui_lastest_msg(
79
+ lastmsg=f"正在执行任务: {txt}\n\n配置修改完成,五秒后即将重启!若出现报错请无视即可。", chatbot=chatbot, history=history, delay=5
80
+ )
81
+ os.execl(sys.executable, sys.executable, *sys.argv)
crazy_functions/vt_fns/vt_state.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+
3
+ class VoidTerminalState():
4
+ def __init__(self):
5
+ self.reset_state()
6
+
7
+ def reset_state(self):
8
+ self.has_provided_explaination = False
9
+
10
+ def lock_plugin(self, chatbot):
11
+ chatbot._cookies['lock_plugin'] = 'crazy_functions.虚空终端->虚空终端'
12
+ chatbot._cookies['plugin_state'] = pickle.dumps(self)
13
+
14
+ def unlock_plugin(self, chatbot):
15
+ self.reset_state()
16
+ chatbot._cookies['lock_plugin'] = None
17
+ chatbot._cookies['plugin_state'] = pickle.dumps(self)
18
+
19
+ def set_state(self, chatbot, key, value):
20
+ setattr(self, key, value)
21
+ chatbot._cookies['plugin_state'] = pickle.dumps(self)
22
+
23
+ def get_state(chatbot):
24
+ state = chatbot._cookies.get('plugin_state', None)
25
+ if state is not None: state = pickle.loads(state)
26
+ else: state = VoidTerminalState()
27
+ state.chatbot = chatbot
28
+ return state
crazy_functions/命令行助手.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui, gen_time_str
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
+ from .crazy_utils import input_clipping
4
+ import copy, json
5
+
6
+ @CatchException
7
+ def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
8
+ """
9
+ txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
10
+ llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
11
+ plugin_kwargs 插件模型的参数, 暂时没有用武之地
12
+ chatbot 聊天显示框的句柄, 用于显示给用户
13
+ history 聊天历史, 前情提要
14
+ system_prompt 给gpt的静默提醒
15
+ web_port 当前软件运行的端口号
16
+ """
17
+ # 清空历史, 以免输入溢出
18
+ history = []
19
+
20
+ # 输入
21
+ i_say = "请写bash命令实现以下功能:" + txt
22
+ # 开始
23
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
24
+ inputs=i_say, inputs_show_user=txt,
25
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
26
+ sys_prompt="你是一个Linux大师级用户。注意,当我要求你写bash命令时,尽可能地仅用一行命令解决我的要求。"
27
+ )
28
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
29
+
30
+
31
+
crazy_functions/对话历史存档.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder, get_user
2
+ import re
3
+
4
+ f_prefix = 'GPT-Academic对话存档'
5
+
6
+ def write_chat_to_file(chatbot, history=None, file_name=None):
7
+ """
8
+ 将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
9
+ """
10
+ import os
11
+ import time
12
+ if file_name is None:
13
+ file_name = f_prefix + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
14
+ fp = os.path.join(get_log_folder(get_user(chatbot), plugin_name='chat_history'), file_name)
15
+ with open(fp, 'w', encoding='utf8') as f:
16
+ from themes.theme import advanced_css
17
+ f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
18
+ for i, contents in enumerate(chatbot):
19
+ for j, content in enumerate(contents):
20
+ try: # 这个bug没找到触发条件,暂时先这样顶一下
21
+ if type(content) != str: content = str(content)
22
+ except:
23
+ continue
24
+ f.write(content)
25
+ if j == 0:
26
+ f.write('<hr style="border-top: dotted 3px #ccc;">')
27
+ f.write('<hr color="red"> \n\n')
28
+ f.write('<hr color="blue"> \n\n raw chat context:\n')
29
+ f.write('<code>')
30
+ for h in history:
31
+ f.write("\n>>>" + h)
32
+ f.write('</code>')
33
+ promote_file_to_downloadzone(fp, rename_file=file_name, chatbot=chatbot)
34
+ return '对话历史写入:' + fp
35
+
36
+ def gen_file_preview(file_name):
37
+ try:
38
+ with open(file_name, 'r', encoding='utf8') as f:
39
+ file_content = f.read()
40
+ # pattern to match the text between <head> and </head>
41
+ pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
42
+ file_content = re.sub(pattern, '', file_content)
43
+ html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
44
+ history = history.strip('<code>')
45
+ history = history.strip('</code>')
46
+ history = history.split("\n>>>")
47
+ return list(filter(lambda x:x!="", history))[0][:100]
48
+ except:
49
+ return ""
50
+
51
+ def read_file_to_chat(chatbot, history, file_name):
52
+ with open(file_name, 'r', encoding='utf8') as f:
53
+ file_content = f.read()
54
+ # pattern to match the text between <head> and </head>
55
+ pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
56
+ file_content = re.sub(pattern, '', file_content)
57
+ html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
58
+ history = history.strip('<code>')
59
+ history = history.strip('</code>')
60
+ history = history.split("\n>>>")
61
+ history = list(filter(lambda x:x!="", history))
62
+ html = html.split('<hr color="red"> \n\n')
63
+ html = list(filter(lambda x:x!="", html))
64
+ chatbot.clear()
65
+ for i, h in enumerate(html):
66
+ i_say, gpt_say = h.split('<hr style="border-top: dotted 3px #ccc;">')
67
+ chatbot.append([i_say, gpt_say])
68
+ chatbot.append([f"存档文件详情?", f"[Local Message] 载入对话{len(html)}条,上下文{len(history)}条。"])
69
+ return chatbot, history
70
+
71
+ @CatchException
72
+ def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
73
+ """
74
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
75
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
76
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
77
+ chatbot 聊天显示框的句柄,用于显示给用户
78
+ history 聊天历史,前情提要
79
+ system_prompt 给gpt的静默提醒
80
+ web_port 当前软件运行的端口号
81
+ """
82
+
83
+ chatbot.append(("保存当前对话",
84
+ f"[Local Message] {write_chat_to_file(chatbot, history)},您可以调用下拉菜单中的“载入对话历史存档”还原当下的对话。"))
85
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
86
+
87
+ def hide_cwd(str):
88
+ import os
89
+ current_path = os.getcwd()
90
+ replace_path = "."
91
+ return str.replace(current_path, replace_path)
92
+
93
+ @CatchException
94
+ def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
95
+ """
96
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
97
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
98
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
99
+ chatbot 聊天显示框的句柄,用于显示给用户
100
+ history 聊天历史,前情提要
101
+ system_prompt 给gpt的静默提醒
102
+ web_port 当前软件运行的端口号
103
+ """
104
+ from .crazy_utils import get_files_from_everything
105
+ success, file_manifest, _ = get_files_from_everything(txt, type='.html')
106
+
107
+ if not success:
108
+ if txt == "": txt = '空空如也的输入栏'
109
+ import glob
110
+ local_history = "<br/>".join([
111
+ "`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`"
112
+ for f in glob.glob(
113
+ f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html',
114
+ recursive=True
115
+ )])
116
+ chatbot.append([f"正在查找对话历史文件(html格式): {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
117
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
118
+ return
119
+
120
+ try:
121
+ chatbot, history = read_file_to_chat(chatbot, history, file_manifest[0])
122
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
123
+ except:
124
+ chatbot.append([f"载入对话历史文件", f"对话历史文件损坏!"])
125
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
126
+ return
127
+
128
+ @CatchException
129
+ def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
130
+ """
131
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
132
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
133
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
134
+ chatbot 聊天显示框的句柄,用于显示给用户
135
+ history 聊天历史,前情提要
136
+ system_prompt 给gpt的静默提醒
137
+ web_port 当前软件运行的端口号
138
+ """
139
+
140
+ import glob, os
141
+ local_history = "<br/>".join([
142
+ "`"+hide_cwd(f)+"`"
143
+ for f in glob.glob(
144
+ f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True
145
+ )])
146
+ for f in glob.glob(f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True):
147
+ os.remove(f)
148
+ chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
149
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
150
+ return
151
+
152
+
crazy_functions/生成函数注释.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui
2
+ from toolbox import CatchException, report_exception
3
+ from toolbox import write_history_to_file, promote_file_to_downloadzone
4
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
5
+ fast_debug = False
6
+
7
+ def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
8
+ import time, os
9
+ print('begin analysis on:', file_manifest)
10
+ for index, fp in enumerate(file_manifest):
11
+ with open(fp, 'r', encoding='utf-8', errors='replace') as f:
12
+ file_content = f.read()
13
+
14
+ i_say = f'请对下面的程序文件做一个概述,并对文件中的所有函数生成注释,使用markdown表格输出结果,文件名是{os.path.relpath(fp, project_folder)},文件内容是 ```{file_content}```'
15
+ i_say_show_user = f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述,并对文件中的所有函数生成注释: {os.path.abspath(fp)}'
16
+ chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
17
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
18
+
19
+ if not fast_debug:
20
+ msg = '正常'
21
+ # ** gpt request **
22
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
23
+ i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
24
+
25
+ chatbot[-1] = (i_say_show_user, gpt_say)
26
+ history.append(i_say_show_user); history.append(gpt_say)
27
+ yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
28
+ if not fast_debug: time.sleep(2)
29
+
30
+ if not fast_debug:
31
+ res = write_history_to_file(history)
32
+ promote_file_to_downloadzone(res, chatbot=chatbot)
33
+ chatbot.append(("完成了吗?", res))
34
+ yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
35
+
36
+
37
+
38
+ @CatchException
39
+ def 批量生成函数注释(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
40
+ history = [] # 清空历史,以免输入溢出
41
+ import glob, os
42
+ if os.path.exists(txt):
43
+ project_folder = txt
44
+ else:
45
+ if txt == "": txt = '空空如也的输入栏'
46
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
47
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
48
+ return
49
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)] + \
50
+ [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)]
51
+
52
+ if len(file_manifest) == 0:
53
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
54
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
55
+ return
56
+ yield from 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
crazy_functions/联网的ChatGPT.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from request_llms.bridge_all import model_info
6
+
7
+ def google(query, proxies):
8
+ query = query # 在此处替换您要搜索的关键词
9
+ url = f"https://www.google.com/search?q={query}"
10
+ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
11
+ response = requests.get(url, headers=headers, proxies=proxies)
12
+ soup = BeautifulSoup(response.content, 'html.parser')
13
+ results = []
14
+ for g in soup.find_all('div', class_='g'):
15
+ anchors = g.find_all('a')
16
+ if anchors:
17
+ link = anchors[0]['href']
18
+ if link.startswith('/url?q='):
19
+ link = link[7:]
20
+ if not link.startswith('http'):
21
+ continue
22
+ title = g.find('h3').text
23
+ item = {'title': title, 'link': link}
24
+ results.append(item)
25
+
26
+ for r in results:
27
+ print(r['link'])
28
+ return results
29
+
30
+ def scrape_text(url, proxies) -> str:
31
+ """Scrape text from a webpage
32
+
33
+ Args:
34
+ url (str): The URL to scrape text from
35
+
36
+ Returns:
37
+ str: The scraped text
38
+ """
39
+ headers = {
40
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
41
+ 'Content-Type': 'text/plain',
42
+ }
43
+ try:
44
+ response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
45
+ if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
46
+ except:
47
+ return "无法连接到该网页"
48
+ soup = BeautifulSoup(response.text, "html.parser")
49
+ for script in soup(["script", "style"]):
50
+ script.extract()
51
+ text = soup.get_text()
52
+ lines = (line.strip() for line in text.splitlines())
53
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
54
+ text = "\n".join(chunk for chunk in chunks if chunk)
55
+ return text
56
+
57
+ @CatchException
58
+ def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
59
+ """
60
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
61
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
62
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
63
+ chatbot 聊天显示框的句柄,用于显示给用户
64
+ history 聊天历史,前情提要
65
+ system_prompt 给gpt的静默提醒
66
+ web_port 当前软件运行的端口号
67
+ """
68
+ history = [] # 清空历史,以免输入溢出
69
+ chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
70
+ "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
71
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
72
+
73
+ # ------------- < 第1步:爬取搜索引擎的结果 > -------------
74
+ from toolbox import get_conf
75
+ proxies = get_conf('proxies')
76
+ urls = google(txt, proxies)
77
+ history = []
78
+ if len(urls) == 0:
79
+ chatbot.append((f"结论:{txt}",
80
+ "[Local Message] 受到google限制,无法从google获取信息!"))
81
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
82
+ return
83
+ # ------------- < 第2步:依次访问网页 > -------------
84
+ max_search_result = 5 # 最多收纳多少个网页的结果
85
+ for index, url in enumerate(urls[:max_search_result]):
86
+ res = scrape_text(url['link'], proxies)
87
+ history.extend([f"第{index}份搜索结果:", res])
88
+ chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
89
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
90
+
91
+ # ------------- < 第3步:ChatGPT综合 > -------------
92
+ i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
93
+ i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
94
+ inputs=i_say,
95
+ history=history,
96
+ max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
97
+ )
98
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
99
+ inputs=i_say, inputs_show_user=i_say,
100
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
101
+ sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
102
+ )
103
+ chatbot[-1] = (i_say, gpt_say)
104
+ history.append(i_say);history.append(gpt_say)
105
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
106
+
crazy_functions/联网的ChatGPT_bing版.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
3
+ import requests
4
+ from bs4 import BeautifulSoup
5
+ from request_llms.bridge_all import model_info
6
+
7
+
8
+ def bing_search(query, proxies=None):
9
+ query = query
10
+ url = f"https://cn.bing.com/search?q={query}"
11
+ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
12
+ response = requests.get(url, headers=headers, proxies=proxies)
13
+ soup = BeautifulSoup(response.content, 'html.parser')
14
+ results = []
15
+ for g in soup.find_all('li', class_='b_algo'):
16
+ anchors = g.find_all('a')
17
+ if anchors:
18
+ link = anchors[0]['href']
19
+ if not link.startswith('http'):
20
+ continue
21
+ title = g.find('h2').text
22
+ item = {'title': title, 'link': link}
23
+ results.append(item)
24
+
25
+ for r in results:
26
+ print(r['link'])
27
+ return results
28
+
29
+
30
+ def scrape_text(url, proxies) -> str:
31
+ """Scrape text from a webpage
32
+
33
+ Args:
34
+ url (str): The URL to scrape text from
35
+
36
+ Returns:
37
+ str: The scraped text
38
+ """
39
+ headers = {
40
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
41
+ 'Content-Type': 'text/plain',
42
+ }
43
+ try:
44
+ response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
45
+ if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
46
+ except:
47
+ return "无法连接到该网页"
48
+ soup = BeautifulSoup(response.text, "html.parser")
49
+ for script in soup(["script", "style"]):
50
+ script.extract()
51
+ text = soup.get_text()
52
+ lines = (line.strip() for line in text.splitlines())
53
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
54
+ text = "\n".join(chunk for chunk in chunks if chunk)
55
+ return text
56
+
57
+ @CatchException
58
+ def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
59
+ """
60
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
61
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
62
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
63
+ chatbot 聊天显示框的句柄,用于显示给用户
64
+ history 聊天历史,前情提要
65
+ system_prompt 给gpt的静默提醒
66
+ web_port 当前软件运行的端口号
67
+ """
68
+ history = [] # 清空历史,以免输入溢出
69
+ chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
70
+ "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
71
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
72
+
73
+ # ------------- < 第1步:爬取搜索引擎的结果 > -------------
74
+ from toolbox import get_conf
75
+ proxies = get_conf('proxies')
76
+ urls = bing_search(txt, proxies)
77
+ history = []
78
+ if len(urls) == 0:
79
+ chatbot.append((f"结论:{txt}",
80
+ "[Local Message] 受到bing限制,无法从bing获取信息!"))
81
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
82
+ return
83
+ # ------------- < 第2步:依次访问网页 > -------------
84
+ max_search_result = 8 # 最多收纳多少个网页的结果
85
+ for index, url in enumerate(urls[:max_search_result]):
86
+ res = scrape_text(url['link'], proxies)
87
+ history.extend([f"第{index}份搜索结果:", res])
88
+ chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
89
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
90
+
91
+ # ------------- < 第3步:ChatGPT综合 > -------------
92
+ i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
93
+ i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
94
+ inputs=i_say,
95
+ history=history,
96
+ max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
97
+ )
98
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
99
+ inputs=i_say, inputs_show_user=i_say,
100
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
101
+ sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行���结,然后回答问题。"
102
+ )
103
+ chatbot[-1] = (i_say, gpt_say)
104
+ history.append(i_say);history.append(gpt_say)
105
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
106
+
crazy_functions/虚空终端.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Explanation of the Void Terminal Plugin:
3
+
4
+ Please describe in natural language what you want to do.
5
+
6
+ 1. You can open the plugin's dropdown menu to explore various capabilities of this project, and then describe your needs in natural language, for example:
7
+ - "Please call the plugin to translate a PDF paper for me. I just uploaded the paper to the upload area."
8
+ - "Please use the plugin to translate a PDF paper, with the address being https://www.nature.com/articles/s41586-019-1724-z.pdf."
9
+ - "Generate an image with blooming flowers and lush green grass using the plugin."
10
+ - "Translate the README using the plugin. The GitHub URL is https://github.com/facebookresearch/co-tracker."
11
+ - "Translate an Arxiv paper for me. The Arxiv ID is 1812.10695. Remember to use the plugin and don't do it manually!"
12
+ - "I don't like the current interface color. Modify the configuration and change the theme to THEME="High-Contrast"."
13
+ - "Could you please explain the structure of the Transformer network?"
14
+
15
+ 2. If you use keywords like "call the plugin xxx", "modify the configuration xxx", "please", etc., your intention can be recognized more accurately.
16
+
17
+ 3. Your intention can be recognized more accurately when using powerful models like GPT4. This plugin is relatively new, so please feel free to provide feedback on GitHub.
18
+
19
+ 4. Now, if you need to process a file, please upload the file (drag the file to the file upload area) or describe the path to the file.
20
+
21
+ 5. If you don't need to upload a file, you can simply repeat your command again.
22
+ """
23
+ explain_msg = """
24
+ ## 虚空终端插件说明:
25
+
26
+ 1. 请用**自然语言**描述您需要做什么。例如:
27
+ - 「请调用插件,为我翻译PDF论文,论文我刚刚放到上传区了」
28
+ - 「请调用插件翻译PDF论文,地址为https://openreview.net/pdf?id=rJl0r3R9KX」
29
+ - 「把Arxiv论文翻译成中文PDF,arxiv论文的ID是1812.10695,记得用插件!」
30
+ - 「生成一张图片,图中鲜花怒放,绿草如茵,用插件实现」
31
+ - 「用插件翻译README,Github网址是https://github.com/facebookresearch/co-tracker」
32
+ - 「我不喜欢当前的界面颜色,修改配置,把主题THEME更换为THEME="High-Contrast"」
33
+ - 「请调用插件,解析python源代码项目,代码我刚刚打包拖到上传区了」
34
+ - 「请问Transformer网络的结构是怎样的?」
35
+
36
+ 2. 您可以打开插件下拉菜单以了解本项目的各种能力。
37
+
38
+ 3. 如果您使用「调用插件xxx」、「修改配置xxx」、「请问」等关键词,您的意图可以被识别的更准确。
39
+
40
+ 4. 建议使用 GPT3.5 或更强的模型,弱模型可能无法理解您的想法。该插件诞生时间不长,欢迎您前往Github反馈问题。
41
+
42
+ 5. 现在,如果需要处理文件,请您上传文件(将文件拖动到文件上传区),或者描述文件所在的路径。
43
+
44
+ 6. 如果不需要上传文件,现在您只需要再次重复一次您的指令即可。
45
+ """
46
+
47
+ from pydantic import BaseModel, Field
48
+ from typing import List
49
+ from toolbox import CatchException, update_ui, is_the_upload_folder
50
+ from toolbox import update_ui_lastest_msg, disable_auto_promotion
51
+ from request_llms.bridge_all import predict_no_ui_long_connection
52
+ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
53
+ from crazy_functions.crazy_utils import input_clipping
54
+ from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
55
+ from crazy_functions.vt_fns.vt_state import VoidTerminalState
56
+ from crazy_functions.vt_fns.vt_modify_config import modify_configuration_hot
57
+ from crazy_functions.vt_fns.vt_modify_config import modify_configuration_reboot
58
+ from crazy_functions.vt_fns.vt_call_plugin import execute_plugin
59
+
60
+ class UserIntention(BaseModel):
61
+ user_prompt: str = Field(description="the content of user input", default="")
62
+ intention_type: str = Field(description="the type of user intention, choose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']", default="ExecutePlugin")
63
+ user_provide_file: bool = Field(description="whether the user provides a path to a file", default=False)
64
+ user_provide_url: bool = Field(description="whether the user provides a url", default=False)
65
+
66
+
67
+ def chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
68
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
69
+ inputs=txt, inputs_show_user=txt,
70
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
71
+ sys_prompt=system_prompt
72
+ )
73
+ chatbot[-1] = [txt, gpt_say]
74
+ history.extend([txt, gpt_say])
75
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
76
+ pass
77
+
78
+
79
+ explain_intention_to_user = {
80
+ 'Chat': "聊天对话",
81
+ 'ExecutePlugin': "调用插件",
82
+ 'ModifyConfiguration': "修改配置",
83
+ }
84
+
85
+
86
+ def analyze_intention_with_simple_rules(txt):
87
+ user_intention = UserIntention()
88
+ user_intention.user_prompt = txt
89
+ is_certain = False
90
+
91
+ if '请问' in txt:
92
+ is_certain = True
93
+ user_intention.intention_type = 'Chat'
94
+
95
+ if '用插件' in txt:
96
+ is_certain = True
97
+ user_intention.intention_type = 'ExecutePlugin'
98
+
99
+ if '修改配置' in txt:
100
+ is_certain = True
101
+ user_intention.intention_type = 'ModifyConfiguration'
102
+
103
+ return is_certain, user_intention
104
+
105
+
106
+ @CatchException
107
+ def 虚空终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
108
+ disable_auto_promotion(chatbot=chatbot)
109
+ # 获取当前虚空终端状态
110
+ state = VoidTerminalState.get_state(chatbot)
111
+ appendix_msg = ""
112
+
113
+ # 用简单的关键词检测用户意图
114
+ is_certain, _ = analyze_intention_with_simple_rules(txt)
115
+ if is_the_upload_folder(txt):
116
+ state.set_state(chatbot=chatbot, key='has_provided_explaination', value=False)
117
+ appendix_msg = "\n\n**很好,您已经上传了文件**,现在请您描述您的需求。"
118
+
119
+ if is_certain or (state.has_provided_explaination):
120
+ # 如果意图明确,跳过提示环节
121
+ state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
122
+ state.unlock_plugin(chatbot=chatbot)
123
+ yield from update_ui(chatbot=chatbot, history=history)
124
+ yield from 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
125
+ return
126
+ else:
127
+ # 如果意图模糊,提示
128
+ state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
129
+ state.lock_plugin(chatbot=chatbot)
130
+ chatbot.append(("虚空终端状态:", explain_msg+appendix_msg))
131
+ yield from update_ui(chatbot=chatbot, history=history)
132
+ return
133
+
134
+
135
+
136
+ def 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
137
+ history = []
138
+ chatbot.append(("虚空终端状态: ", f"正在执行任务: {txt}"))
139
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
140
+
141
+ # ⭐ ⭐ ⭐ 分析用户意图
142
+ is_certain, user_intention = analyze_intention_with_simple_rules(txt)
143
+ if not is_certain:
144
+ yield from update_ui_lastest_msg(
145
+ lastmsg=f"正在执行任务: {txt}\n\n分析用户意图中", chatbot=chatbot, history=history, delay=0)
146
+ gpt_json_io = GptJsonIO(UserIntention)
147
+ rf_req = "\nchoose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']"
148
+ inputs = "Analyze the intention of the user according to following user input: \n\n" + \
149
+ ">> " + (txt+rf_req).rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
150
+ run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
151
+ inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
152
+ analyze_res = run_gpt_fn(inputs, "")
153
+ try:
154
+ user_intention = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
155
+ lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
156
+ except JsonStringError as e:
157
+ yield from update_ui_lastest_msg(
158
+ lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 失败 当前语言模型({llm_kwargs['llm_model']})不能理解您的意图", chatbot=chatbot, history=history, delay=0)
159
+ return
160
+ else:
161
+ pass
162
+
163
+ yield from update_ui_lastest_msg(
164
+ lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
165
+ chatbot=chatbot, history=history, delay=0)
166
+
167
+ # 用户意图: 修改本项目的配置
168
+ if user_intention.intention_type == 'ModifyConfiguration':
169
+ yield from modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
170
+
171
+ # 用户意图: 调度插件
172
+ if user_intention.intention_type == 'ExecutePlugin':
173
+ yield from execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
174
+
175
+ # 用户意图: 聊天
176
+ if user_intention.intention_type == 'Chat':
177
+ yield from chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
178
+
179
+ return
180
+
crazy_functions/解析JupyterNotebook.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui
2
+ from toolbox import CatchException, report_exception
3
+ from toolbox import write_history_to_file, promote_file_to_downloadzone
4
+ fast_debug = True
5
+
6
+
7
+ class PaperFileGroup():
8
+ def __init__(self):
9
+ self.file_paths = []
10
+ self.file_contents = []
11
+ self.sp_file_contents = []
12
+ self.sp_file_index = []
13
+ self.sp_file_tag = []
14
+
15
+ def run_file_split(self, max_token_limit=1900):
16
+ """
17
+ 将长文本分离开来
18
+ """
19
+ for index, file_content in enumerate(self.file_contents):
20
+ if self.get_token_num(file_content) < max_token_limit:
21
+ self.sp_file_contents.append(file_content)
22
+ self.sp_file_index.append(index)
23
+ self.sp_file_tag.append(self.file_paths[index])
24
+ else:
25
+ from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
26
+ segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
27
+ for j, segment in enumerate(segments):
28
+ self.sp_file_contents.append(segment)
29
+ self.sp_file_index.append(index)
30
+ self.sp_file_tag.append(
31
+ self.file_paths[index] + f".part-{j}.txt")
32
+
33
+
34
+
35
+ def parseNotebook(filename, enable_markdown=1):
36
+ import json
37
+
38
+ CodeBlocks = []
39
+ with open(filename, 'r', encoding='utf-8', errors='replace') as f:
40
+ notebook = json.load(f)
41
+ for cell in notebook['cells']:
42
+ if cell['cell_type'] == 'code' and cell['source']:
43
+ # remove blank lines
44
+ cell['source'] = [line for line in cell['source'] if line.strip()
45
+ != '']
46
+ CodeBlocks.append("".join(cell['source']))
47
+ elif enable_markdown and cell['cell_type'] == 'markdown' and cell['source']:
48
+ cell['source'] = [line for line in cell['source'] if line.strip()
49
+ != '']
50
+ CodeBlocks.append("Markdown:"+"".join(cell['source']))
51
+
52
+ Code = ""
53
+ for idx, code in enumerate(CodeBlocks):
54
+ Code += f"This is {idx+1}th code block: \n"
55
+ Code += code+"\n"
56
+
57
+ return Code
58
+
59
+
60
+ def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
61
+ from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
62
+
63
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
64
+ enable_markdown = plugin_kwargs.get("advanced_arg", "1")
65
+ try:
66
+ enable_markdown = int(enable_markdown)
67
+ except ValueError:
68
+ enable_markdown = 1
69
+
70
+ pfg = PaperFileGroup()
71
+
72
+ for fp in file_manifest:
73
+ file_content = parseNotebook(fp, enable_markdown=enable_markdown)
74
+ pfg.file_paths.append(fp)
75
+ pfg.file_contents.append(file_content)
76
+
77
+ # <-------- 拆分过长的IPynb文件 ---------->
78
+ pfg.run_file_split(max_token_limit=1024)
79
+ n_split = len(pfg.sp_file_contents)
80
+
81
+ inputs_array = [r"This is a Jupyter Notebook file, tell me about Each Block in Chinese. Focus Just On Code." +
82
+ r"If a block starts with `Markdown` which means it's a markdown block in ipynbipynb. " +
83
+ r"Start a new line for a block and block num use Chinese." +
84
+ f"\n\n{frag}" for frag in pfg.sp_file_contents]
85
+ inputs_show_user_array = [f"{f}的分析如下" for f in pfg.sp_file_tag]
86
+ sys_prompt_array = ["You are a professional programmer."] * n_split
87
+
88
+ gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
89
+ inputs_array=inputs_array,
90
+ inputs_show_user_array=inputs_show_user_array,
91
+ llm_kwargs=llm_kwargs,
92
+ chatbot=chatbot,
93
+ history_array=[[""] for _ in range(n_split)],
94
+ sys_prompt_array=sys_prompt_array,
95
+ # max_workers=5, # OpenAI所允许的最大并行过载
96
+ scroller_max_len=80
97
+ )
98
+
99
+ # <-------- 整理结果,退出 ---------->
100
+ block_result = " \n".join(gpt_response_collection)
101
+ chatbot.append(("解析的结果如下", block_result))
102
+ history.extend(["解析的结果如下", block_result])
103
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
104
+
105
+ # <-------- 写入文件,退出 ---------->
106
+ res = write_history_to_file(history)
107
+ promote_file_to_downloadzone(res, chatbot=chatbot)
108
+ chatbot.append(("完成了吗?", res))
109
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
110
+
111
+ @CatchException
112
+ def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
113
+ chatbot.append([
114
+ "函数插件功能?",
115
+ "对IPynb文件进行解析。Contributor: codycjy."])
116
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
117
+
118
+ history = [] # 清空历史
119
+ import glob
120
+ import os
121
+ if os.path.exists(txt):
122
+ project_folder = txt
123
+ else:
124
+ if txt == "":
125
+ txt = '空空如也的输入栏'
126
+ report_exception(chatbot, history,
127
+ a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
128
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
129
+ return
130
+ if txt.endswith('.ipynb'):
131
+ file_manifest = [txt]
132
+ else:
133
+ file_manifest = [f for f in glob.glob(
134
+ f'{project_folder}/**/*.ipynb', recursive=True)]
135
+ if len(file_manifest) == 0:
136
+ report_exception(chatbot, history,
137
+ a=f"解析项目: {txt}", b=f"找不到任何.ipynb文件: {txt}")
138
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
139
+ return
140
+ yield from ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, )
crazy_functions/解析项目源代码.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui, promote_file_to_downloadzone, disable_auto_promotion
2
+ from toolbox import CatchException, report_exception, write_history_to_file
3
+ from .crazy_utils import input_clipping
4
+
5
+ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
6
+ import os, copy
7
+ from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
8
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
9
+ disable_auto_promotion(chatbot=chatbot)
10
+
11
+ summary_batch_isolation = True
12
+ inputs_array = []
13
+ inputs_show_user_array = []
14
+ history_array = []
15
+ sys_prompt_array = []
16
+ report_part_1 = []
17
+
18
+ assert len(file_manifest) <= 2048, "源文件太多(超过512个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。"
19
+ ############################## <第一步,逐个文件分析,多线程> ##################################
20
+ for index, fp in enumerate(file_manifest):
21
+ # 读取文件
22
+ with open(fp, 'r', encoding='utf-8', errors='replace') as f:
23
+ file_content = f.read()
24
+ prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
25
+ i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
26
+ i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {fp}'
27
+ # 装载请求内容
28
+ inputs_array.append(i_say)
29
+ inputs_show_user_array.append(i_say_show_user)
30
+ history_array.append([])
31
+ sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。")
32
+
33
+ # 文件读取完成,对每一个源代码文件,生成一个请求线程,发送到chatgpt进行分析
34
+ gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
35
+ inputs_array = inputs_array,
36
+ inputs_show_user_array = inputs_show_user_array,
37
+ history_array = history_array,
38
+ sys_prompt_array = sys_prompt_array,
39
+ llm_kwargs = llm_kwargs,
40
+ chatbot = chatbot,
41
+ show_user_at_complete = True
42
+ )
43
+
44
+ # 全部文件解析完成,结果写入文件,准备对工程源代码进行汇总分析
45
+ report_part_1 = copy.deepcopy(gpt_response_collection)
46
+ history_to_return = report_part_1
47
+ res = write_history_to_file(report_part_1)
48
+ promote_file_to_downloadzone(res, chatbot=chatbot)
49
+ chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
50
+ yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
51
+
52
+ ############################## <第二步,综合,单线程,分组+迭代处理> ##################################
53
+ batchsize = 16 # 10个文件为一组
54
+ report_part_2 = []
55
+ previous_iteration_files = []
56
+ last_iteration_result = ""
57
+ while True:
58
+ if len(file_manifest) == 0: break
59
+ this_iteration_file_manifest = file_manifest[:batchsize]
60
+ this_iteration_gpt_response_collection = gpt_response_collection[:batchsize*2]
61
+ file_rel_path = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
62
+ # 把“请对下面的程序文件做一个概述” 替换成 精简的 "文件名:{all_file[index]}"
63
+ for index, content in enumerate(this_iteration_gpt_response_collection):
64
+ if index%2==0: this_iteration_gpt_response_collection[index] = f"{file_rel_path[index//2]}" # 只保留文件名节省token
65
+ this_iteration_files = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
66
+ previous_iteration_files.extend(this_iteration_files)
67
+ previous_iteration_files_string = ', '.join(previous_iteration_files)
68
+ current_iteration_focus = ', '.join(this_iteration_files)
69
+ if summary_batch_isolation: focus = current_iteration_focus
70
+ else: focus = previous_iteration_files_string
71
+ i_say = f'用一张Markdown表格简要描述以下文件的功能:{focus}。根据以上分析,用一句话概括程序的整体功能。'
72
+ if last_iteration_result != "":
73
+ sys_prompt_additional = "已知某些代码的局部作用是:" + last_iteration_result + "\n请继续分析其他源代码,从而更全面地理解项目的整体功能。"
74
+ else:
75
+ sys_prompt_additional = ""
76
+ inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
77
+ this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
78
+ this_iteration_history.append(last_iteration_result)
79
+ # 裁剪input
80
+ inputs, this_iteration_history_feed = input_clipping(inputs=i_say, history=this_iteration_history, max_token_limit=2560)
81
+ result = yield from request_gpt_model_in_new_thread_with_ui_alive(
82
+ inputs=inputs, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
83
+ history=this_iteration_history_feed, # 迭代之前的分析
84
+ sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。" + sys_prompt_additional)
85
+
86
+ summary = "请用一句话概括这些文件的整体功能"
87
+ summary_result = yield from request_gpt_model_in_new_thread_with_ui_alive(
88
+ inputs=summary,
89
+ inputs_show_user=summary,
90
+ llm_kwargs=llm_kwargs,
91
+ chatbot=chatbot,
92
+ history=[i_say, result], # 迭代之前的分析
93
+ sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。" + sys_prompt_additional)
94
+
95
+ report_part_2.extend([i_say, result])
96
+ last_iteration_result = summary_result
97
+ file_manifest = file_manifest[batchsize:]
98
+ gpt_response_collection = gpt_response_collection[batchsize*2:]
99
+
100
+ ############################## <END> ##################################
101
+ history_to_return.extend(report_part_2)
102
+ res = write_history_to_file(history_to_return)
103
+ promote_file_to_downloadzone(res, chatbot=chatbot)
104
+ chatbot.append(("完成了吗?", res))
105
+ yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
106
+
107
+
108
+ @CatchException
109
+ def 解析项目本身(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
110
+ history = [] # 清空历史,以免输入溢出
111
+ import glob
112
+ file_manifest = [f for f in glob.glob('./*.py')] + \
113
+ [f for f in glob.glob('./*/*.py')]
114
+ project_folder = './'
115
+ if len(file_manifest) == 0:
116
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
117
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
118
+ return
119
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
120
+
121
+ @CatchException
122
+ def 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
123
+ history = [] # 清空历史,以免输入溢出
124
+ import glob, os
125
+ if os.path.exists(txt):
126
+ project_folder = txt
127
+ else:
128
+ if txt == "": txt = '空空如也的输入栏'
129
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
130
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
131
+ return
132
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
133
+ if len(file_manifest) == 0:
134
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
135
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
136
+ return
137
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
138
+
139
+ @CatchException
140
+ def 解析一个Matlab项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
141
+ history = [] # 清空历史,以免输入溢出
142
+ import glob, os
143
+ if os.path.exists(txt):
144
+ project_folder = txt
145
+ else:
146
+ if txt == "": txt = '空空如也的输入栏'
147
+ report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
148
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
149
+ return
150
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.m', recursive=True)]
151
+ if len(file_manifest) == 0:
152
+ report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到任何`.m`源文件: {txt}")
153
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
154
+ return
155
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
156
+
157
+ @CatchException
158
+ def 解析一个C项目的头文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
159
+ history = [] # 清空历史,以免输入溢出
160
+ import glob, os
161
+ if os.path.exists(txt):
162
+ project_folder = txt
163
+ else:
164
+ if txt == "": txt = '空空如也的输入栏'
165
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
166
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
167
+ return
168
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
169
+ [f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] #+ \
170
+ # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
171
+ if len(file_manifest) == 0:
172
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
173
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
174
+ return
175
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
176
+
177
+ @CatchException
178
+ def 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
179
+ history = [] # 清空历史,以免输入溢出
180
+ import glob, os
181
+ if os.path.exists(txt):
182
+ project_folder = txt
183
+ else:
184
+ if txt == "": txt = '空空如也的输入栏'
185
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
186
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
187
+ return
188
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
189
+ [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
190
+ [f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] + \
191
+ [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
192
+ if len(file_manifest) == 0:
193
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
194
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
195
+ return
196
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
197
+
198
+
199
+ @CatchException
200
+ def 解析一个Java项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
201
+ history = [] # 清空历史,以免输入溢出
202
+ import glob, os
203
+ if os.path.exists(txt):
204
+ project_folder = txt
205
+ else:
206
+ if txt == "": txt = '空空如也的输入栏'
207
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
208
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
209
+ return
210
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.java', recursive=True)] + \
211
+ [f for f in glob.glob(f'{project_folder}/**/*.jar', recursive=True)] + \
212
+ [f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
213
+ [f for f in glob.glob(f'{project_folder}/**/*.sh', recursive=True)]
214
+ if len(file_manifest) == 0:
215
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
216
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
217
+ return
218
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
219
+
220
+
221
+ @CatchException
222
+ def 解析一个前端项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
223
+ history = [] # 清空历史,以免输入溢出
224
+ import glob, os
225
+ if os.path.exists(txt):
226
+ project_folder = txt
227
+ else:
228
+ if txt == "": txt = '空空如也的输入栏'
229
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
230
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
231
+ return
232
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.ts', recursive=True)] + \
233
+ [f for f in glob.glob(f'{project_folder}/**/*.tsx', recursive=True)] + \
234
+ [f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
235
+ [f for f in glob.glob(f'{project_folder}/**/*.js', recursive=True)] + \
236
+ [f for f in glob.glob(f'{project_folder}/**/*.vue', recursive=True)] + \
237
+ [f for f in glob.glob(f'{project_folder}/**/*.less', recursive=True)] + \
238
+ [f for f in glob.glob(f'{project_folder}/**/*.sass', recursive=True)] + \
239
+ [f for f in glob.glob(f'{project_folder}/**/*.wxml', recursive=True)] + \
240
+ [f for f in glob.glob(f'{project_folder}/**/*.wxss', recursive=True)] + \
241
+ [f for f in glob.glob(f'{project_folder}/**/*.css', recursive=True)] + \
242
+ [f for f in glob.glob(f'{project_folder}/**/*.jsx', recursive=True)]
243
+ if len(file_manifest) == 0:
244
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何前端相关文件: {txt}")
245
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
246
+ return
247
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
248
+
249
+
250
+ @CatchException
251
+ def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
252
+ history = [] # 清空历史,以免输入溢出
253
+ import glob, os
254
+ if os.path.exists(txt):
255
+ project_folder = txt
256
+ else:
257
+ if txt == "": txt = '空空如也的输入栏'
258
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
259
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
260
+ return
261
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.go', recursive=True)] + \
262
+ [f for f in glob.glob(f'{project_folder}/**/go.mod', recursive=True)] + \
263
+ [f for f in glob.glob(f'{project_folder}/**/go.sum', recursive=True)] + \
264
+ [f for f in glob.glob(f'{project_folder}/**/go.work', recursive=True)]
265
+ if len(file_manifest) == 0:
266
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
267
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
268
+ return
269
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
270
+
271
+ @CatchException
272
+ def 解析一个Rust项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
273
+ history = [] # 清空历史,以免输入溢出
274
+ import glob, os
275
+ if os.path.exists(txt):
276
+ project_folder = txt
277
+ else:
278
+ if txt == "": txt = '空空如也的输入栏'
279
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
280
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
281
+ return
282
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.rs', recursive=True)] + \
283
+ [f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)] + \
284
+ [f for f in glob.glob(f'{project_folder}/**/*.lock', recursive=True)]
285
+ if len(file_manifest) == 0:
286
+ report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
287
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
288
+ return
289
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
290
+
291
+ @CatchException
292
+ def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
293
+ history = [] # 清空历史,以免输入溢出
294
+ import glob, os
295
+ if os.path.exists(txt):
296
+ project_folder = txt
297
+ else:
298
+ if txt == "": txt = '空空如也的输入栏'
299
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
300
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
301
+ return
302
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.lua', recursive=True)] + \
303
+ [f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
304
+ [f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
305
+ [f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)]
306
+ if len(file_manifest) == 0:
307
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
308
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
309
+ return
310
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
311
+
312
+
313
+ @CatchException
314
+ def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
315
+ history = [] # 清空历史,以免输入溢出
316
+ import glob, os
317
+ if os.path.exists(txt):
318
+ project_folder = txt
319
+ else:
320
+ if txt == "": txt = '空空如也的输入栏'
321
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
322
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
323
+ return
324
+ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.cs', recursive=True)] + \
325
+ [f for f in glob.glob(f'{project_folder}/**/*.csproj', recursive=True)]
326
+ if len(file_manifest) == 0:
327
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
328
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
329
+ return
330
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
331
+
332
+
333
+ @CatchException
334
+ def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
335
+ txt_pattern = plugin_kwargs.get("advanced_arg")
336
+ txt_pattern = txt_pattern.replace(",", ",")
337
+ # 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
338
+ pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
339
+ if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
340
+ # 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
341
+ pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
342
+ pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
343
+ # 将要忽略匹配的文件名(例如: ^README.md)
344
+ pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
345
+ # 生成正则表达式
346
+ pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
347
+ pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
348
+
349
+ history.clear()
350
+ import glob, os, re
351
+ if os.path.exists(txt):
352
+ project_folder = txt
353
+ else:
354
+ if txt == "": txt = '空空如也的输入栏'
355
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
356
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
357
+ return
358
+ # 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
359
+ maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
360
+ if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
361
+ extract_folder_path = maybe_dir[0]
362
+ else:
363
+ extract_folder_path = project_folder
364
+ # 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
365
+ file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
366
+ os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
367
+ if len(file_manifest) == 0:
368
+ report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
369
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
370
+ return
371
+ yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
crazy_functions/谷歌检索小助手.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
2
+ from toolbox import CatchException, report_exception, promote_file_to_downloadzone
3
+ from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file
4
+ import logging
5
+ import requests
6
+ import time
7
+ import random
8
+
9
+ ENABLE_ALL_VERSION_SEARCH = True
10
+
11
+ def get_meta_information(url, chatbot, history):
12
+ import arxiv
13
+ import difflib
14
+ import re
15
+ from bs4 import BeautifulSoup
16
+ from toolbox import get_conf
17
+ from urllib.parse import urlparse
18
+ session = requests.session()
19
+
20
+ proxies = get_conf('proxies')
21
+ headers = {
22
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
23
+ 'Accept-Encoding': 'gzip, deflate, br',
24
+ 'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
25
+ 'Cache-Control':'max-age=0',
26
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
27
+ 'Connection': 'keep-alive'
28
+ }
29
+ try:
30
+ session.proxies.update(proxies)
31
+ except:
32
+ report_exception(chatbot, history,
33
+ a=f"获取代理失败 无代理状态下很可能无法访问OpenAI家族的模型及谷歌学术 建议:检查USE_PROXY选项是否修改。",
34
+ b=f"尝试直接连接")
35
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
36
+ session.headers.update(headers)
37
+
38
+ response = session.get(url)
39
+ # 解析网页内容
40
+ soup = BeautifulSoup(response.text, "html.parser")
41
+
42
+ def string_similar(s1, s2):
43
+ return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
44
+
45
+ if ENABLE_ALL_VERSION_SEARCH:
46
+ def search_all_version(url):
47
+ time.sleep(random.randint(1,5)) # 睡一会防止触发google反爬虫
48
+ response = session.get(url)
49
+ soup = BeautifulSoup(response.text, "html.parser")
50
+
51
+ for result in soup.select(".gs_ri"):
52
+ try:
53
+ url = result.select_one(".gs_rt").a['href']
54
+ except:
55
+ continue
56
+ arxiv_id = extract_arxiv_id(url)
57
+ if not arxiv_id:
58
+ continue
59
+ search = arxiv.Search(
60
+ id_list=[arxiv_id],
61
+ max_results=1,
62
+ sort_by=arxiv.SortCriterion.Relevance,
63
+ )
64
+ try: paper = next(search.results())
65
+ except: paper = None
66
+ return paper
67
+
68
+ return None
69
+
70
+ def extract_arxiv_id(url):
71
+ # 返回给定的url解析出的arxiv_id,如url未成功匹配返回None
72
+ pattern = r'arxiv.org/abs/([^/]+)'
73
+ match = re.search(pattern, url)
74
+ if match:
75
+ return match.group(1)
76
+ else:
77
+ return None
78
+
79
+ profile = []
80
+ # 获取所有文章的标题和作者
81
+ for result in soup.select(".gs_ri"):
82
+ title = result.a.text.replace('\n', ' ').replace(' ', ' ')
83
+ author = result.select_one(".gs_a").text
84
+ try:
85
+ citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
86
+ except:
87
+ citation = 'cited by 0'
88
+ abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
89
+
90
+ # 首先在arxiv上搜索,获取文章摘要
91
+ search = arxiv.Search(
92
+ query = title,
93
+ max_results = 1,
94
+ sort_by = arxiv.SortCriterion.Relevance,
95
+ )
96
+ try: paper = next(search.results())
97
+ except: paper = None
98
+
99
+ is_match = paper is not None and string_similar(title, paper.title) > 0.90
100
+
101
+ # 如果在Arxiv上匹配失败,检索文章的历史版本的题目
102
+ if not is_match and ENABLE_ALL_VERSION_SEARCH:
103
+ other_versions_page_url = [tag['href'] for tag in result.select_one('.gs_flb').select('.gs_nph') if 'cluster' in tag['href']]
104
+ if len(other_versions_page_url) > 0:
105
+ other_versions_page_url = other_versions_page_url[0]
106
+ paper = search_all_version('http://' + urlparse(url).netloc + other_versions_page_url)
107
+ is_match = paper is not None and string_similar(title, paper.title) > 0.90
108
+
109
+ if is_match:
110
+ # same paper
111
+ abstract = paper.summary.replace('\n', ' ')
112
+ is_paper_in_arxiv = True
113
+ else:
114
+ # different paper
115
+ abstract = abstract
116
+ is_paper_in_arxiv = False
117
+
118
+ logging.info('[title]:' + title)
119
+ logging.info('[author]:' + author)
120
+ logging.info('[citation]:' + citation)
121
+
122
+ profile.append({
123
+ 'title': title,
124
+ 'author': author,
125
+ 'citation': citation,
126
+ 'abstract': abstract,
127
+ 'is_paper_in_arxiv': is_paper_in_arxiv,
128
+ })
129
+
130
+ chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中(不在arxiv中无法获取完整摘要):{is_paper_in_arxiv}\n\n' + abstract]
131
+ yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
132
+ return profile
133
+
134
+ @CatchException
135
+ def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
136
+ disable_auto_promotion(chatbot=chatbot)
137
+ # 基本信息:功能、贡献者
138
+ chatbot.append([
139
+ "函数插件功能?",
140
+ "分析用户提供的谷歌学术(google scholar)搜索页面中,出现的所有文章: binary-husky,插件初始化中..."])
141
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
142
+
143
+ # 尝试导入依赖,如果缺少依赖,则给出安装建议
144
+ try:
145
+ import arxiv
146
+ import math
147
+ from bs4 import BeautifulSoup
148
+ except:
149
+ report_exception(chatbot, history,
150
+ a = f"解析项目: {txt}",
151
+ b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
152
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
153
+ return
154
+
155
+ # 清空历史,以免输入溢出
156
+ history = []
157
+ meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
158
+ if len(meta_paper_info_list) == 0:
159
+ yield from update_ui_lastest_msg(lastmsg='获取文献失败,可能触发了google反爬虫机制。',chatbot=chatbot, history=history, delay=0)
160
+ return
161
+ batchsize = 5
162
+ for batch in range(math.ceil(len(meta_paper_info_list)/batchsize)):
163
+ if len(meta_paper_info_list[:batchsize]) > 0:
164
+ i_say = "下面是一些学术文献的数据,提取出以下内容:" + \
165
+ "1、英文题目;2、中文题目翻译;3、作者;4、arxiv公开(is_paper_in_arxiv);4、引用数量(cite);5、中文摘要翻译。" + \
166
+ f"以下是信息源:{str(meta_paper_info_list[:batchsize])}"
167
+
168
+ inputs_show_user = f"请分析此页面中出现的所有文章:{txt},这是第{batch+1}批"
169
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
170
+ inputs=i_say, inputs_show_user=inputs_show_user,
171
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
172
+ sys_prompt="你是一个学术翻译,请从数据中提取信息。你必须使用Markdown表格。你必须逐个文献进行处理。"
173
+ )
174
+
175
+ history.extend([ f"第{batch+1}批", gpt_say ])
176
+ meta_paper_info_list = meta_paper_info_list[batchsize:]
177
+
178
+ chatbot.append(["状态?",
179
+ "已经全部完成,您可以试试让AI写一个Related Works,例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
180
+ msg = '正常'
181
+ yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
182
+ path = write_history_to_file(history)
183
+ promote_file_to_downloadzone(path, chatbot=chatbot)
184
+ chatbot.append(("完成了吗?", path));
185
+ yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
crazy_functions/辅助功能.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # encoding: utf-8
2
+ # @Time : 2023/4/19
3
+ # @Author : Spike
4
+ # @Descr :
5
+ from toolbox import update_ui, get_conf, get_user
6
+ from toolbox import CatchException
7
+ from toolbox import default_user_name
8
+ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
9
+ import shutil
10
+ import os
11
+
12
+
13
+ @CatchException
14
+ def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
15
+ if txt:
16
+ show_say = txt
17
+ prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
18
+ else:
19
+ prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
20
+ show_say = '分析上述回答,再列出用户可能提出的三个问题。'
21
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
22
+ inputs=prompt,
23
+ inputs_show_user=show_say,
24
+ llm_kwargs=llm_kwargs,
25
+ chatbot=chatbot,
26
+ history=history,
27
+ sys_prompt=system_prompt
28
+ )
29
+ chatbot[-1] = (show_say, gpt_say)
30
+ history.extend([show_say, gpt_say])
31
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
32
+
33
+
34
+ @CatchException
35
+ def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
36
+ chatbot.append(['清除本地缓存数据', '执行中. 删除数据'])
37
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
38
+
39
+ def _get_log_folder(user=default_user_name):
40
+ PATH_LOGGING = get_conf('PATH_LOGGING')
41
+ _dir = os.path.join(PATH_LOGGING, user)
42
+ if not os.path.exists(_dir): os.makedirs(_dir)
43
+ return _dir
44
+
45
+ def _get_upload_folder(user=default_user_name):
46
+ PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
47
+ _dir = os.path.join(PATH_PRIVATE_UPLOAD, user)
48
+ return _dir
49
+
50
+ shutil.rmtree(_get_log_folder(get_user(chatbot)), ignore_errors=True)
51
+ shutil.rmtree(_get_upload_folder(get_user(chatbot)), ignore_errors=True)
52
+
53
+ chatbot.append(['清除本地缓存数据', '执行完成'])
54
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
crazy_functions/高级功能函数模板.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
3
+ import datetime
4
+ @CatchException
5
+ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
6
+ """
7
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
8
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
9
+ plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数
10
+ chatbot 聊天显示框的句柄,用于显示给用户
11
+ history 聊天历史,前情提要
12
+ system_prompt 给gpt的静默提醒
13
+ web_port 当前软件运行的端口号
14
+ """
15
+ history = [] # 清空历史,以免输入溢出
16
+ chatbot.append(("这是什么功能?", "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板(该函数只有20多行代码)。此外我们也提供可同步处理大量文件的多线程Demo供您参考。您若希望分享新的功能模组,请不吝PR!"))
17
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
18
+ for i in range(5):
19
+ currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
20
+ currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
21
+ i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
22
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
23
+ inputs=i_say, inputs_show_user=i_say,
24
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
25
+ sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
26
+ )
27
+ chatbot[-1] = (i_say, gpt_say)
28
+ history.append(i_say);history.append(gpt_say)
29
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新