Spaces:
Sleeping
Sleeping
Upload 34 files
Browse files- crazy_functions/CodeInterpreter.py +232 -0
- crazy_functions/__init__.py +0 -0
- crazy_functions/agent_fns/auto_agent.py +23 -0
- crazy_functions/agent_fns/echo_agent.py +19 -0
- crazy_functions/agent_fns/general.py +134 -0
- crazy_functions/agent_fns/persistent.py +16 -0
- crazy_functions/agent_fns/pipe.py +194 -0
- crazy_functions/agent_fns/watchdog.py +28 -0
- crazy_functions/chatglm微调工具.py +141 -0
- crazy_functions/crazy_utils.py +609 -0
- crazy_functions/gen_fns/gen_fns_shared.py +70 -0
- crazy_functions/ipc_fns/mp.py +37 -0
- crazy_functions/json_fns/pydantic_io.py +111 -0
- crazy_functions/live_audio/aliyunASR.py +261 -0
- crazy_functions/live_audio/audio_io.py +51 -0
- crazy_functions/multi_stage/multi_stage_utils.py +93 -0
- crazy_functions/pdf_fns/breakdown_txt.py +125 -0
- crazy_functions/pdf_fns/parse_pdf.py +171 -0
- crazy_functions/pdf_fns/report_gen_html.py +58 -0
- crazy_functions/pdf_fns/report_template.html +0 -0
- crazy_functions/vt_fns/vt_call_plugin.py +114 -0
- crazy_functions/vt_fns/vt_modify_config.py +81 -0
- crazy_functions/vt_fns/vt_state.py +28 -0
- crazy_functions/命令行助手.py +31 -0
- crazy_functions/对话历史存档.py +152 -0
- crazy_functions/生成函数注释.py +56 -0
- crazy_functions/联网的ChatGPT.py +106 -0
- crazy_functions/联网的ChatGPT_bing版.py +106 -0
- crazy_functions/虚空终端.py +180 -0
- crazy_functions/解析JupyterNotebook.py +140 -0
- crazy_functions/解析项目源代码.py +371 -0
- crazy_functions/谷歌检索小助手.py +185 -0
- crazy_functions/辅助功能.py +54 -0
- crazy_functions/高级功能函数模板.py +29 -0
crazy_functions/CodeInterpreter.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from collections.abc import Callable, Iterable, Mapping
|
| 2 |
+
from typing import Any
|
| 3 |
+
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc
|
| 4 |
+
from toolbox import promote_file_to_downloadzone, get_log_folder
|
| 5 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 6 |
+
from .crazy_utils import input_clipping, try_install_deps
|
| 7 |
+
from multiprocessing import Process, Pipe
|
| 8 |
+
import os
|
| 9 |
+
import time
|
| 10 |
+
|
| 11 |
+
templete = """
|
| 12 |
+
```python
|
| 13 |
+
import ... # Put dependencies here, e.g. import numpy as np
|
| 14 |
+
|
| 15 |
+
class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
|
| 16 |
+
|
| 17 |
+
def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
|
| 18 |
+
# rewrite the function you have just written here
|
| 19 |
+
...
|
| 20 |
+
return generated_file_path
|
| 21 |
+
```
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
def inspect_dependency(chatbot, history):
|
| 25 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 26 |
+
return True
|
| 27 |
+
|
| 28 |
+
def get_code_block(reply):
|
| 29 |
+
import re
|
| 30 |
+
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
|
| 31 |
+
matches = re.findall(pattern, reply) # find all code blocks in text
|
| 32 |
+
if len(matches) == 1:
|
| 33 |
+
return matches[0].strip('python') # code block
|
| 34 |
+
for match in matches:
|
| 35 |
+
if 'class TerminalFunction' in match:
|
| 36 |
+
return match.strip('python') # code block
|
| 37 |
+
raise RuntimeError("GPT is not generating proper code.")
|
| 38 |
+
|
| 39 |
+
def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
|
| 40 |
+
# 输入
|
| 41 |
+
prompt_compose = [
|
| 42 |
+
f'Your job:\n'
|
| 43 |
+
f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
|
| 44 |
+
f"2. You should write this function to perform following task: " + txt + "\n",
|
| 45 |
+
f"3. Wrap the output python function with markdown codeblock."
|
| 46 |
+
]
|
| 47 |
+
i_say = "".join(prompt_compose)
|
| 48 |
+
demo = []
|
| 49 |
+
|
| 50 |
+
# 第一步
|
| 51 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 52 |
+
inputs=i_say, inputs_show_user=i_say,
|
| 53 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
|
| 54 |
+
sys_prompt= r"You are a programmer."
|
| 55 |
+
)
|
| 56 |
+
history.extend([i_say, gpt_say])
|
| 57 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
| 58 |
+
|
| 59 |
+
# 第二步
|
| 60 |
+
prompt_compose = [
|
| 61 |
+
"If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
|
| 62 |
+
templete
|
| 63 |
+
]
|
| 64 |
+
i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
|
| 65 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 66 |
+
inputs=i_say, inputs_show_user=inputs_show_user,
|
| 67 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
| 68 |
+
sys_prompt= r"You are a programmer."
|
| 69 |
+
)
|
| 70 |
+
code_to_return = gpt_say
|
| 71 |
+
history.extend([i_say, gpt_say])
|
| 72 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
| 73 |
+
|
| 74 |
+
# # 第三步
|
| 75 |
+
# i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
|
| 76 |
+
# i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
|
| 77 |
+
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 78 |
+
# inputs=i_say, inputs_show_user=inputs_show_user,
|
| 79 |
+
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
| 80 |
+
# sys_prompt= r"You are a programmer."
|
| 81 |
+
# )
|
| 82 |
+
# # # 第三步
|
| 83 |
+
# i_say = "Show me how to use `pip` to install packages to run the code above. "
|
| 84 |
+
# i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
|
| 85 |
+
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 86 |
+
# inputs=i_say, inputs_show_user=i_say,
|
| 87 |
+
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
| 88 |
+
# sys_prompt= r"You are a programmer."
|
| 89 |
+
# )
|
| 90 |
+
installation_advance = ""
|
| 91 |
+
|
| 92 |
+
return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
|
| 93 |
+
|
| 94 |
+
def make_module(code):
|
| 95 |
+
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
|
| 96 |
+
with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f:
|
| 97 |
+
f.write(code)
|
| 98 |
+
|
| 99 |
+
def get_class_name(class_string):
|
| 100 |
+
import re
|
| 101 |
+
# Use regex to extract the class name
|
| 102 |
+
class_name = re.search(r'class (\w+)\(', class_string).group(1)
|
| 103 |
+
return class_name
|
| 104 |
+
|
| 105 |
+
class_name = get_class_name(code)
|
| 106 |
+
return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}"
|
| 107 |
+
|
| 108 |
+
def init_module_instance(module):
|
| 109 |
+
import importlib
|
| 110 |
+
module_, class_ = module.split('->')
|
| 111 |
+
init_f = getattr(importlib.import_module(module_), class_)
|
| 112 |
+
return init_f()
|
| 113 |
+
|
| 114 |
+
def for_immediate_show_off_when_possible(file_type, fp, chatbot):
|
| 115 |
+
if file_type in ['png', 'jpg']:
|
| 116 |
+
image_path = os.path.abspath(fp)
|
| 117 |
+
chatbot.append(['这是一张图片, 展示如下:',
|
| 118 |
+
f'本地文件地址: <br/>`{image_path}`<br/>'+
|
| 119 |
+
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
| 120 |
+
])
|
| 121 |
+
return chatbot
|
| 122 |
+
|
| 123 |
+
def subprocess_worker(instance, file_path, return_dict):
|
| 124 |
+
return_dict['result'] = instance.run(file_path)
|
| 125 |
+
|
| 126 |
+
def have_any_recent_upload_files(chatbot):
|
| 127 |
+
_5min = 5 * 60
|
| 128 |
+
if not chatbot: return False # chatbot is None
|
| 129 |
+
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
| 130 |
+
if not most_recent_uploaded: return False # most_recent_uploaded is None
|
| 131 |
+
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
|
| 132 |
+
else: return False # most_recent_uploaded is too old
|
| 133 |
+
|
| 134 |
+
def get_recent_file_prompt_support(chatbot):
|
| 135 |
+
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
| 136 |
+
path = most_recent_uploaded['path']
|
| 137 |
+
return path
|
| 138 |
+
|
| 139 |
+
@CatchException
|
| 140 |
+
def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 141 |
+
"""
|
| 142 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 143 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 144 |
+
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
| 145 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 146 |
+
history 聊天历史,前情提要
|
| 147 |
+
system_prompt 给gpt的静默提醒
|
| 148 |
+
web_port 当前软件运行的端口号
|
| 149 |
+
"""
|
| 150 |
+
raise NotImplementedError
|
| 151 |
+
|
| 152 |
+
# 清空历史,以免输入溢出
|
| 153 |
+
history = []; clear_file_downloadzone(chatbot)
|
| 154 |
+
|
| 155 |
+
# 基本信息:功能、贡献者
|
| 156 |
+
chatbot.append([
|
| 157 |
+
"函数插件功能?",
|
| 158 |
+
"CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..."
|
| 159 |
+
])
|
| 160 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 161 |
+
|
| 162 |
+
if have_any_recent_upload_files(chatbot):
|
| 163 |
+
file_path = get_recent_file_prompt_support(chatbot)
|
| 164 |
+
else:
|
| 165 |
+
chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
|
| 166 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 167 |
+
|
| 168 |
+
# 读取文件
|
| 169 |
+
if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
|
| 170 |
+
recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
|
| 171 |
+
file_path = recently_uploaded_files[-1]
|
| 172 |
+
file_type = file_path.split('.')[-1]
|
| 173 |
+
|
| 174 |
+
# 粗心检查
|
| 175 |
+
if is_the_upload_folder(txt):
|
| 176 |
+
chatbot.append([
|
| 177 |
+
"...",
|
| 178 |
+
f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"
|
| 179 |
+
])
|
| 180 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 181 |
+
return
|
| 182 |
+
|
| 183 |
+
# 开始干正事
|
| 184 |
+
for j in range(5): # 最多重试5次
|
| 185 |
+
try:
|
| 186 |
+
code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
|
| 187 |
+
yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
|
| 188 |
+
code = get_code_block(code)
|
| 189 |
+
res = make_module(code)
|
| 190 |
+
instance = init_module_instance(res)
|
| 191 |
+
break
|
| 192 |
+
except Exception as e:
|
| 193 |
+
chatbot.append([f"第{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
|
| 194 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 195 |
+
|
| 196 |
+
# 代码生成结束, 开始执行
|
| 197 |
+
try:
|
| 198 |
+
import multiprocessing
|
| 199 |
+
manager = multiprocessing.Manager()
|
| 200 |
+
return_dict = manager.dict()
|
| 201 |
+
|
| 202 |
+
p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
|
| 203 |
+
# only has 10 seconds to run
|
| 204 |
+
p.start(); p.join(timeout=10)
|
| 205 |
+
if p.is_alive(): p.terminate(); p.join()
|
| 206 |
+
p.close()
|
| 207 |
+
res = return_dict['result']
|
| 208 |
+
# res = instance.run(file_path)
|
| 209 |
+
except Exception as e:
|
| 210 |
+
chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
|
| 211 |
+
# chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
|
| 212 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 213 |
+
return
|
| 214 |
+
|
| 215 |
+
# 顺利完成,收尾
|
| 216 |
+
res = str(res)
|
| 217 |
+
if os.path.exists(res):
|
| 218 |
+
chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
|
| 219 |
+
new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
|
| 220 |
+
chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
|
| 221 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
| 222 |
+
else:
|
| 223 |
+
chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
|
| 224 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
| 225 |
+
|
| 226 |
+
"""
|
| 227 |
+
测试:
|
| 228 |
+
裁剪图像,保留下半部分
|
| 229 |
+
交换图像的蓝色通道和红色通道
|
| 230 |
+
将图像转为灰度图像
|
| 231 |
+
将csv文件转excel表格
|
| 232 |
+
"""
|
crazy_functions/__init__.py
ADDED
|
File without changes
|
crazy_functions/agent_fns/auto_agent.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
|
| 2 |
+
from toolbox import report_exception, get_log_folder, update_ui_lastest_msg, Singleton
|
| 3 |
+
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
|
| 4 |
+
from crazy_functions.agent_fns.general import AutoGenGeneral
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class AutoGenMath(AutoGenGeneral):
|
| 9 |
+
|
| 10 |
+
def define_agents(self):
|
| 11 |
+
from autogen import AssistantAgent, UserProxyAgent
|
| 12 |
+
return [
|
| 13 |
+
{
|
| 14 |
+
"name": "assistant", # name of the agent.
|
| 15 |
+
"cls": AssistantAgent, # class of the agent.
|
| 16 |
+
},
|
| 17 |
+
{
|
| 18 |
+
"name": "user_proxy", # name of the agent.
|
| 19 |
+
"cls": UserProxyAgent, # class of the agent.
|
| 20 |
+
"human_input_mode": "ALWAYS", # always ask for human input.
|
| 21 |
+
"llm_config": False, # disables llm-based auto reply.
|
| 22 |
+
},
|
| 23 |
+
]
|
crazy_functions/agent_fns/echo_agent.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
|
| 2 |
+
|
| 3 |
+
class EchoDemo(PluginMultiprocessManager):
|
| 4 |
+
def subprocess_worker(self, child_conn):
|
| 5 |
+
# ⭐⭐ 子进程
|
| 6 |
+
self.child_conn = child_conn
|
| 7 |
+
while True:
|
| 8 |
+
msg = self.child_conn.recv() # PipeCom
|
| 9 |
+
if msg.cmd == "user_input":
|
| 10 |
+
# wait futher user input
|
| 11 |
+
self.child_conn.send(PipeCom("show", msg.content))
|
| 12 |
+
wait_success = self.subprocess_worker_wait_user_feedback(wait_msg="我准备好处理下一个问题了.")
|
| 13 |
+
if not wait_success:
|
| 14 |
+
# wait timeout, terminate this subprocess_worker
|
| 15 |
+
break
|
| 16 |
+
elif msg.cmd == "terminate":
|
| 17 |
+
self.child_conn.send(PipeCom("done", ""))
|
| 18 |
+
break
|
| 19 |
+
print('[debug] subprocess_worker terminated')
|
crazy_functions/agent_fns/general.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import trimmed_format_exc, get_conf, ProxyNetworkActivate
|
| 2 |
+
from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
|
| 3 |
+
from request_llms.bridge_all import predict_no_ui_long_connection
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
def gpt_academic_generate_oai_reply(
|
| 7 |
+
self,
|
| 8 |
+
messages,
|
| 9 |
+
sender,
|
| 10 |
+
config,
|
| 11 |
+
):
|
| 12 |
+
llm_config = self.llm_config if config is None else config
|
| 13 |
+
if llm_config is False:
|
| 14 |
+
return False, None
|
| 15 |
+
if messages is None:
|
| 16 |
+
messages = self._oai_messages[sender]
|
| 17 |
+
|
| 18 |
+
inputs = messages[-1]['content']
|
| 19 |
+
history = []
|
| 20 |
+
for message in messages[:-1]:
|
| 21 |
+
history.append(message['content'])
|
| 22 |
+
context=messages[-1].pop("context", None)
|
| 23 |
+
assert context is None, "预留参数 context 未实现"
|
| 24 |
+
|
| 25 |
+
reply = predict_no_ui_long_connection(
|
| 26 |
+
inputs=inputs,
|
| 27 |
+
llm_kwargs=llm_config,
|
| 28 |
+
history=history,
|
| 29 |
+
sys_prompt=self._oai_system_message[0]['content'],
|
| 30 |
+
console_slience=True
|
| 31 |
+
)
|
| 32 |
+
assumed_done = reply.endswith('\nTERMINATE')
|
| 33 |
+
return True, reply
|
| 34 |
+
|
| 35 |
+
class AutoGenGeneral(PluginMultiprocessManager):
|
| 36 |
+
def gpt_academic_print_override(self, user_proxy, message, sender):
|
| 37 |
+
# ⭐⭐ run in subprocess
|
| 38 |
+
self.child_conn.send(PipeCom("show", sender.name + "\n\n---\n\n" + message["content"]))
|
| 39 |
+
|
| 40 |
+
def gpt_academic_get_human_input(self, user_proxy, message):
|
| 41 |
+
# ⭐⭐ run in subprocess
|
| 42 |
+
patience = 300
|
| 43 |
+
begin_waiting_time = time.time()
|
| 44 |
+
self.child_conn.send(PipeCom("interact", message))
|
| 45 |
+
while True:
|
| 46 |
+
time.sleep(0.5)
|
| 47 |
+
if self.child_conn.poll():
|
| 48 |
+
wait_success = True
|
| 49 |
+
break
|
| 50 |
+
if time.time() - begin_waiting_time > patience:
|
| 51 |
+
self.child_conn.send(PipeCom("done", ""))
|
| 52 |
+
wait_success = False
|
| 53 |
+
break
|
| 54 |
+
if wait_success:
|
| 55 |
+
return self.child_conn.recv().content
|
| 56 |
+
else:
|
| 57 |
+
raise TimeoutError("等待用户输入超时")
|
| 58 |
+
|
| 59 |
+
def define_agents(self):
|
| 60 |
+
raise NotImplementedError
|
| 61 |
+
|
| 62 |
+
def exe_autogen(self, input):
|
| 63 |
+
# ⭐⭐ run in subprocess
|
| 64 |
+
input = input.content
|
| 65 |
+
with ProxyNetworkActivate("AutoGen"):
|
| 66 |
+
code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
|
| 67 |
+
agents = self.define_agents()
|
| 68 |
+
user_proxy = None
|
| 69 |
+
assistant = None
|
| 70 |
+
for agent_kwargs in agents:
|
| 71 |
+
agent_cls = agent_kwargs.pop('cls')
|
| 72 |
+
kwargs = {
|
| 73 |
+
'llm_config':self.llm_kwargs,
|
| 74 |
+
'code_execution_config':code_execution_config
|
| 75 |
+
}
|
| 76 |
+
kwargs.update(agent_kwargs)
|
| 77 |
+
agent_handle = agent_cls(**kwargs)
|
| 78 |
+
agent_handle._print_received_message = lambda a,b: self.gpt_academic_print_override(agent_kwargs, a, b)
|
| 79 |
+
for d in agent_handle._reply_func_list:
|
| 80 |
+
if hasattr(d['reply_func'],'__name__') and d['reply_func'].__name__ == 'generate_oai_reply':
|
| 81 |
+
d['reply_func'] = gpt_academic_generate_oai_reply
|
| 82 |
+
if agent_kwargs['name'] == 'user_proxy':
|
| 83 |
+
agent_handle.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
|
| 84 |
+
user_proxy = agent_handle
|
| 85 |
+
if agent_kwargs['name'] == 'assistant': assistant = agent_handle
|
| 86 |
+
try:
|
| 87 |
+
if user_proxy is None or assistant is None: raise Exception("用户代理或助理代理未定义")
|
| 88 |
+
user_proxy.initiate_chat(assistant, message=input)
|
| 89 |
+
except Exception as e:
|
| 90 |
+
tb_str = '```\n' + trimmed_format_exc() + '```'
|
| 91 |
+
self.child_conn.send(PipeCom("done", "AutoGen 执行失败: \n\n" + tb_str))
|
| 92 |
+
|
| 93 |
+
def subprocess_worker(self, child_conn):
|
| 94 |
+
# ⭐⭐ run in subprocess
|
| 95 |
+
self.child_conn = child_conn
|
| 96 |
+
while True:
|
| 97 |
+
msg = self.child_conn.recv() # PipeCom
|
| 98 |
+
self.exe_autogen(msg)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class AutoGenGroupChat(AutoGenGeneral):
|
| 102 |
+
def exe_autogen(self, input):
|
| 103 |
+
# ⭐⭐ run in subprocess
|
| 104 |
+
import autogen
|
| 105 |
+
|
| 106 |
+
input = input.content
|
| 107 |
+
with ProxyNetworkActivate("AutoGen"):
|
| 108 |
+
code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
|
| 109 |
+
agents = self.define_agents()
|
| 110 |
+
agents_instances = []
|
| 111 |
+
for agent_kwargs in agents:
|
| 112 |
+
agent_cls = agent_kwargs.pop("cls")
|
| 113 |
+
kwargs = {"code_execution_config": code_execution_config}
|
| 114 |
+
kwargs.update(agent_kwargs)
|
| 115 |
+
agent_handle = agent_cls(**kwargs)
|
| 116 |
+
agent_handle._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
|
| 117 |
+
agents_instances.append(agent_handle)
|
| 118 |
+
if agent_kwargs["name"] == "user_proxy":
|
| 119 |
+
user_proxy = agent_handle
|
| 120 |
+
user_proxy.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
|
| 121 |
+
try:
|
| 122 |
+
groupchat = autogen.GroupChat(agents=agents_instances, messages=[], max_round=50)
|
| 123 |
+
manager = autogen.GroupChatManager(groupchat=groupchat, **self.define_group_chat_manager_config())
|
| 124 |
+
manager._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
|
| 125 |
+
manager.get_human_input = lambda a: self.gpt_academic_get_human_input(manager, a)
|
| 126 |
+
if user_proxy is None:
|
| 127 |
+
raise Exception("user_proxy is not defined")
|
| 128 |
+
user_proxy.initiate_chat(manager, message=input)
|
| 129 |
+
except Exception:
|
| 130 |
+
tb_str = "```\n" + trimmed_format_exc() + "```"
|
| 131 |
+
self.child_conn.send(PipeCom("done", "AutoGen exe failed: \n\n" + tb_str))
|
| 132 |
+
|
| 133 |
+
def define_group_chat_manager_config(self):
|
| 134 |
+
raise NotImplementedError
|
crazy_functions/agent_fns/persistent.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import Singleton
|
| 2 |
+
@Singleton
|
| 3 |
+
class GradioMultiuserManagerForPersistentClasses():
|
| 4 |
+
def __init__(self):
|
| 5 |
+
self.mapping = {}
|
| 6 |
+
|
| 7 |
+
def already_alive(self, key):
|
| 8 |
+
return (key in self.mapping) and (self.mapping[key].is_alive())
|
| 9 |
+
|
| 10 |
+
def set(self, key, x):
|
| 11 |
+
self.mapping[key] = x
|
| 12 |
+
return self.mapping[key]
|
| 13 |
+
|
| 14 |
+
def get(self, key):
|
| 15 |
+
return self.mapping[key]
|
| 16 |
+
|
crazy_functions/agent_fns/pipe.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
|
| 2 |
+
from crazy_functions.agent_fns.watchdog import WatchDog
|
| 3 |
+
import time, os
|
| 4 |
+
|
| 5 |
+
class PipeCom:
|
| 6 |
+
def __init__(self, cmd, content) -> None:
|
| 7 |
+
self.cmd = cmd
|
| 8 |
+
self.content = content
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class PluginMultiprocessManager:
|
| 12 |
+
def __init__(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 13 |
+
# ⭐ run in main process
|
| 14 |
+
self.autogen_work_dir = os.path.join(get_log_folder("autogen"), gen_time_str())
|
| 15 |
+
self.previous_work_dir_files = {}
|
| 16 |
+
self.llm_kwargs = llm_kwargs
|
| 17 |
+
self.plugin_kwargs = plugin_kwargs
|
| 18 |
+
self.chatbot = chatbot
|
| 19 |
+
self.history = history
|
| 20 |
+
self.system_prompt = system_prompt
|
| 21 |
+
# self.web_port = web_port
|
| 22 |
+
self.alive = True
|
| 23 |
+
self.use_docker = get_conf("AUTOGEN_USE_DOCKER")
|
| 24 |
+
self.last_user_input = ""
|
| 25 |
+
# create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
|
| 26 |
+
timeout_seconds = 5 * 60
|
| 27 |
+
self.heartbeat_watchdog = WatchDog(timeout=timeout_seconds, bark_fn=self.terminate, interval=5)
|
| 28 |
+
self.heartbeat_watchdog.begin_watch()
|
| 29 |
+
|
| 30 |
+
def feed_heartbeat_watchdog(self):
|
| 31 |
+
# feed this `dog`, so the dog will not `bark` (bark_fn will terminate the instance)
|
| 32 |
+
self.heartbeat_watchdog.feed()
|
| 33 |
+
|
| 34 |
+
def is_alive(self):
|
| 35 |
+
return self.alive
|
| 36 |
+
|
| 37 |
+
def launch_subprocess_with_pipe(self):
|
| 38 |
+
# ⭐ run in main process
|
| 39 |
+
from multiprocessing import Process, Pipe
|
| 40 |
+
|
| 41 |
+
parent_conn, child_conn = Pipe()
|
| 42 |
+
self.p = Process(target=self.subprocess_worker, args=(child_conn,))
|
| 43 |
+
self.p.daemon = True
|
| 44 |
+
self.p.start()
|
| 45 |
+
return parent_conn
|
| 46 |
+
|
| 47 |
+
def terminate(self):
|
| 48 |
+
self.p.terminate()
|
| 49 |
+
self.alive = False
|
| 50 |
+
print("[debug] instance terminated")
|
| 51 |
+
|
| 52 |
+
def subprocess_worker(self, child_conn):
|
| 53 |
+
# ⭐⭐ run in subprocess
|
| 54 |
+
raise NotImplementedError
|
| 55 |
+
|
| 56 |
+
def send_command(self, cmd):
|
| 57 |
+
# ⭐ run in main process
|
| 58 |
+
repeated = False
|
| 59 |
+
if cmd == self.last_user_input:
|
| 60 |
+
repeated = True
|
| 61 |
+
cmd = ""
|
| 62 |
+
else:
|
| 63 |
+
self.last_user_input = cmd
|
| 64 |
+
self.parent_conn.send(PipeCom("user_input", cmd))
|
| 65 |
+
return repeated, cmd
|
| 66 |
+
|
| 67 |
+
def immediate_showoff_when_possible(self, fp):
|
| 68 |
+
# ⭐ 主进程
|
| 69 |
+
# 获取fp的拓展名
|
| 70 |
+
file_type = fp.split('.')[-1]
|
| 71 |
+
# 如果是文本文件, 则直接显示文本内容
|
| 72 |
+
if file_type.lower() in ['png', 'jpg']:
|
| 73 |
+
image_path = os.path.abspath(fp)
|
| 74 |
+
self.chatbot.append([
|
| 75 |
+
'检测到新生图像:',
|
| 76 |
+
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
|
| 77 |
+
])
|
| 78 |
+
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 79 |
+
|
| 80 |
+
def overwatch_workdir_file_change(self):
|
| 81 |
+
# ⭐ 主进程 Docker 外挂文件夹监控
|
| 82 |
+
path_to_overwatch = self.autogen_work_dir
|
| 83 |
+
change_list = []
|
| 84 |
+
# 扫描路径下的所有文件, 并与self.previous_work_dir_files中所记录的文件进行对比,
|
| 85 |
+
# 如果有新文件出现,或者文件的修改时间发生变化,则更新self.previous_work_dir_files中
|
| 86 |
+
# 把新文件和发生变化的文件的路径记录到 change_list 中
|
| 87 |
+
for root, dirs, files in os.walk(path_to_overwatch):
|
| 88 |
+
for file in files:
|
| 89 |
+
file_path = os.path.join(root, file)
|
| 90 |
+
if file_path not in self.previous_work_dir_files.keys():
|
| 91 |
+
last_modified_time = os.stat(file_path).st_mtime
|
| 92 |
+
self.previous_work_dir_files.update({file_path: last_modified_time})
|
| 93 |
+
change_list.append(file_path)
|
| 94 |
+
else:
|
| 95 |
+
last_modified_time = os.stat(file_path).st_mtime
|
| 96 |
+
if last_modified_time != self.previous_work_dir_files[file_path]:
|
| 97 |
+
self.previous_work_dir_files[file_path] = last_modified_time
|
| 98 |
+
change_list.append(file_path)
|
| 99 |
+
if len(change_list) > 0:
|
| 100 |
+
file_links = ""
|
| 101 |
+
for f in change_list:
|
| 102 |
+
res = promote_file_to_downloadzone(f)
|
| 103 |
+
file_links += f'<br/><a href="file={res}" target="_blank">{res}</a>'
|
| 104 |
+
yield from self.immediate_showoff_when_possible(f)
|
| 105 |
+
|
| 106 |
+
self.chatbot.append(['检测到新生文档.', f'文档清单如下: {file_links}'])
|
| 107 |
+
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 108 |
+
return change_list
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def main_process_ui_control(self, txt, create_or_resume) -> str:
|
| 112 |
+
# ⭐ 主进程
|
| 113 |
+
if create_or_resume == 'create':
|
| 114 |
+
self.cnt = 1
|
| 115 |
+
self.parent_conn = self.launch_subprocess_with_pipe() # ⭐⭐⭐
|
| 116 |
+
repeated, cmd_to_autogen = self.send_command(txt)
|
| 117 |
+
if txt == 'exit':
|
| 118 |
+
self.chatbot.append([f"结束", "结束信号已明确,终止AutoGen程序。"])
|
| 119 |
+
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 120 |
+
self.terminate()
|
| 121 |
+
return "terminate"
|
| 122 |
+
|
| 123 |
+
# patience = 10
|
| 124 |
+
|
| 125 |
+
while True:
|
| 126 |
+
time.sleep(0.5)
|
| 127 |
+
if not self.alive:
|
| 128 |
+
# the heartbeat watchdog might have it killed
|
| 129 |
+
self.terminate()
|
| 130 |
+
return "terminate"
|
| 131 |
+
if self.parent_conn.poll():
|
| 132 |
+
self.feed_heartbeat_watchdog()
|
| 133 |
+
if "[GPT-Academic] 等待中" in self.chatbot[-1][-1]:
|
| 134 |
+
self.chatbot.pop(-1) # remove the last line
|
| 135 |
+
if "等待您的进一步指令" in self.chatbot[-1][-1]:
|
| 136 |
+
self.chatbot.pop(-1) # remove the last line
|
| 137 |
+
if '[GPT-Academic] 等待中' in self.chatbot[-1][-1]:
|
| 138 |
+
self.chatbot.pop(-1) # remove the last line
|
| 139 |
+
msg = self.parent_conn.recv() # PipeCom
|
| 140 |
+
if msg.cmd == "done":
|
| 141 |
+
self.chatbot.append([f"结束", msg.content])
|
| 142 |
+
self.cnt += 1
|
| 143 |
+
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 144 |
+
self.terminate()
|
| 145 |
+
break
|
| 146 |
+
if msg.cmd == "show":
|
| 147 |
+
yield from self.overwatch_workdir_file_change()
|
| 148 |
+
notice = ""
|
| 149 |
+
if repeated: notice = "(自动忽略重复的输入)"
|
| 150 |
+
self.chatbot.append([f"运行阶段-{self.cnt}(上次用户反馈输入为: 「{cmd_to_autogen}」{notice}", msg.content])
|
| 151 |
+
self.cnt += 1
|
| 152 |
+
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 153 |
+
if msg.cmd == "interact":
|
| 154 |
+
yield from self.overwatch_workdir_file_change()
|
| 155 |
+
self.chatbot.append([f"程序抵达用户反馈节点.", msg.content +
|
| 156 |
+
"\n\n等待您的进一步指令." +
|
| 157 |
+
"\n\n(1) 一般情况下您不需要说什么, 清空输入区, 然后直接点击“提交”以继续. " +
|
| 158 |
+
"\n\n(2) 如果您需要补充些什么, 输入要反馈的内容, 直接点击“提交”以继续. " +
|
| 159 |
+
"\n\n(3) 如果您想终止程序, 输入exit, 直接点击“提交”以终止AutoGen并解锁. "
|
| 160 |
+
])
|
| 161 |
+
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 162 |
+
# do not terminate here, leave the subprocess_worker instance alive
|
| 163 |
+
return "wait_feedback"
|
| 164 |
+
else:
|
| 165 |
+
self.feed_heartbeat_watchdog()
|
| 166 |
+
if '[GPT-Academic] 等待中' not in self.chatbot[-1][-1]:
|
| 167 |
+
# begin_waiting_time = time.time()
|
| 168 |
+
self.chatbot.append(["[GPT-Academic] 等待AutoGen执行结果 ...", "[GPT-Academic] 等待中"])
|
| 169 |
+
self.chatbot[-1] = [self.chatbot[-1][0], self.chatbot[-1][1].replace("[GPT-Academic] 等待中", "[GPT-Academic] 等待中.")]
|
| 170 |
+
yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 171 |
+
# if time.time() - begin_waiting_time > patience:
|
| 172 |
+
# self.chatbot.append([f"结束", "等待超时, 终止AutoGen程序。"])
|
| 173 |
+
# yield from update_ui(chatbot=self.chatbot, history=self.history)
|
| 174 |
+
# self.terminate()
|
| 175 |
+
# return "terminate"
|
| 176 |
+
|
| 177 |
+
self.terminate()
|
| 178 |
+
return "terminate"
|
| 179 |
+
|
| 180 |
+
def subprocess_worker_wait_user_feedback(self, wait_msg="wait user feedback"):
|
| 181 |
+
# ⭐⭐ run in subprocess
|
| 182 |
+
patience = 5 * 60
|
| 183 |
+
begin_waiting_time = time.time()
|
| 184 |
+
self.child_conn.send(PipeCom("interact", wait_msg))
|
| 185 |
+
while True:
|
| 186 |
+
time.sleep(0.5)
|
| 187 |
+
if self.child_conn.poll():
|
| 188 |
+
wait_success = True
|
| 189 |
+
break
|
| 190 |
+
if time.time() - begin_waiting_time > patience:
|
| 191 |
+
self.child_conn.send(PipeCom("done", ""))
|
| 192 |
+
wait_success = False
|
| 193 |
+
break
|
| 194 |
+
return wait_success
|
crazy_functions/agent_fns/watchdog.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import threading, time
|
| 2 |
+
|
| 3 |
+
class WatchDog():
|
| 4 |
+
def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
|
| 5 |
+
self.last_feed = None
|
| 6 |
+
self.timeout = timeout
|
| 7 |
+
self.bark_fn = bark_fn
|
| 8 |
+
self.interval = interval
|
| 9 |
+
self.msg = msg
|
| 10 |
+
self.kill_dog = False
|
| 11 |
+
|
| 12 |
+
def watch(self):
|
| 13 |
+
while True:
|
| 14 |
+
if self.kill_dog: break
|
| 15 |
+
if time.time() - self.last_feed > self.timeout:
|
| 16 |
+
if len(self.msg) > 0: print(self.msg)
|
| 17 |
+
self.bark_fn()
|
| 18 |
+
break
|
| 19 |
+
time.sleep(self.interval)
|
| 20 |
+
|
| 21 |
+
def begin_watch(self):
|
| 22 |
+
self.last_feed = time.time()
|
| 23 |
+
th = threading.Thread(target=self.watch)
|
| 24 |
+
th.daemon = True
|
| 25 |
+
th.start()
|
| 26 |
+
|
| 27 |
+
def feed(self):
|
| 28 |
+
self.last_feed = time.time()
|
crazy_functions/chatglm微调工具.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import CatchException, update_ui, promote_file_to_downloadzone
|
| 2 |
+
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
| 3 |
+
import datetime, json
|
| 4 |
+
|
| 5 |
+
def fetch_items(list_of_items, batch_size):
|
| 6 |
+
for i in range(0, len(list_of_items), batch_size):
|
| 7 |
+
yield list_of_items[i:i + batch_size]
|
| 8 |
+
|
| 9 |
+
def string_to_options(arguments):
|
| 10 |
+
import argparse
|
| 11 |
+
import shlex
|
| 12 |
+
|
| 13 |
+
# Create an argparse.ArgumentParser instance
|
| 14 |
+
parser = argparse.ArgumentParser()
|
| 15 |
+
|
| 16 |
+
# Add command-line arguments
|
| 17 |
+
parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
|
| 18 |
+
parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
|
| 19 |
+
parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
|
| 20 |
+
parser.add_argument("--batch", type=int, help="System prompt", default=50)
|
| 21 |
+
parser.add_argument("--pre_seq_len", type=int, help="pre_seq_len", default=50)
|
| 22 |
+
parser.add_argument("--learning_rate", type=float, help="learning_rate", default=2e-2)
|
| 23 |
+
parser.add_argument("--num_gpus", type=int, help="num_gpus", default=1)
|
| 24 |
+
parser.add_argument("--json_dataset", type=str, help="json_dataset", default="")
|
| 25 |
+
parser.add_argument("--ptuning_directory", type=str, help="ptuning_directory", default="")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# Parse the arguments
|
| 30 |
+
args = parser.parse_args(shlex.split(arguments))
|
| 31 |
+
|
| 32 |
+
return args
|
| 33 |
+
|
| 34 |
+
@CatchException
|
| 35 |
+
def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 36 |
+
"""
|
| 37 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 38 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 39 |
+
plugin_kwargs 插件模型的参数
|
| 40 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 41 |
+
history 聊天历史,前情提要
|
| 42 |
+
system_prompt 给gpt的静默提醒
|
| 43 |
+
web_port 当前软件运行的端口号
|
| 44 |
+
"""
|
| 45 |
+
history = [] # 清空历史,以免输入溢出
|
| 46 |
+
chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
|
| 47 |
+
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
| 48 |
+
args = plugin_kwargs.get("advanced_arg", None)
|
| 49 |
+
if args is None:
|
| 50 |
+
chatbot.append(("没给定指令", "退出"))
|
| 51 |
+
yield from update_ui(chatbot=chatbot, history=history); return
|
| 52 |
+
else:
|
| 53 |
+
arguments = string_to_options(arguments=args)
|
| 54 |
+
|
| 55 |
+
dat = []
|
| 56 |
+
with open(txt, 'r', encoding='utf8') as f:
|
| 57 |
+
for line in f.readlines():
|
| 58 |
+
json_dat = json.loads(line)
|
| 59 |
+
dat.append(json_dat["content"])
|
| 60 |
+
|
| 61 |
+
llm_kwargs['llm_model'] = arguments.llm_to_learn
|
| 62 |
+
for batch in fetch_items(dat, arguments.batch):
|
| 63 |
+
res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 64 |
+
inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)],
|
| 65 |
+
inputs_show_user_array=[f"Show Nothing" for _ in (batch)],
|
| 66 |
+
llm_kwargs=llm_kwargs,
|
| 67 |
+
chatbot=chatbot,
|
| 68 |
+
history_array=[[] for _ in (batch)],
|
| 69 |
+
sys_prompt_array=[arguments.system_prompt for _ in (batch)],
|
| 70 |
+
max_workers=10 # OpenAI所允许的最大并行过载
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
with open(txt+'.generated.json', 'a+', encoding='utf8') as f:
|
| 74 |
+
for b, r in zip(batch, res[1::2]):
|
| 75 |
+
f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n')
|
| 76 |
+
|
| 77 |
+
promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot)
|
| 78 |
+
return
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
@CatchException
|
| 83 |
+
def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 84 |
+
"""
|
| 85 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 86 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 87 |
+
plugin_kwargs 插件模型的参数
|
| 88 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 89 |
+
history 聊天历史,前情提要
|
| 90 |
+
system_prompt 给gpt的静默提醒
|
| 91 |
+
web_port 当前软件运行的端口号
|
| 92 |
+
"""
|
| 93 |
+
import subprocess
|
| 94 |
+
history = [] # 清空历史,以免输入溢出
|
| 95 |
+
chatbot.append(("这是什么功能?", "[Local Message] 微调数据集生成"))
|
| 96 |
+
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
| 97 |
+
args = plugin_kwargs.get("advanced_arg", None)
|
| 98 |
+
if args is None:
|
| 99 |
+
chatbot.append(("没给定指令", "退出"))
|
| 100 |
+
yield from update_ui(chatbot=chatbot, history=history); return
|
| 101 |
+
else:
|
| 102 |
+
arguments = string_to_options(arguments=args)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
pre_seq_len = arguments.pre_seq_len # 128
|
| 107 |
+
learning_rate = arguments.learning_rate # 2e-2
|
| 108 |
+
num_gpus = arguments.num_gpus # 1
|
| 109 |
+
json_dataset = arguments.json_dataset # 't_code.json'
|
| 110 |
+
ptuning_directory = arguments.ptuning_directory # '/home/hmp/ChatGLM2-6B/ptuning'
|
| 111 |
+
|
| 112 |
+
command = f"torchrun --standalone --nnodes=1 --nproc-per-node={num_gpus} main.py \
|
| 113 |
+
--do_train \
|
| 114 |
+
--train_file AdvertiseGen/{json_dataset} \
|
| 115 |
+
--validation_file AdvertiseGen/{json_dataset} \
|
| 116 |
+
--preprocessing_num_workers 20 \
|
| 117 |
+
--prompt_column content \
|
| 118 |
+
--response_column summary \
|
| 119 |
+
--overwrite_cache \
|
| 120 |
+
--model_name_or_path THUDM/chatglm2-6b \
|
| 121 |
+
--output_dir output/clothgen-chatglm2-6b-pt-{pre_seq_len}-{learning_rate} \
|
| 122 |
+
--overwrite_output_dir \
|
| 123 |
+
--max_source_length 256 \
|
| 124 |
+
--max_target_length 256 \
|
| 125 |
+
--per_device_train_batch_size 1 \
|
| 126 |
+
--per_device_eval_batch_size 1 \
|
| 127 |
+
--gradient_accumulation_steps 16 \
|
| 128 |
+
--predict_with_generate \
|
| 129 |
+
--max_steps 100 \
|
| 130 |
+
--logging_steps 10 \
|
| 131 |
+
--save_steps 20 \
|
| 132 |
+
--learning_rate {learning_rate} \
|
| 133 |
+
--pre_seq_len {pre_seq_len} \
|
| 134 |
+
--quantization_bit 4"
|
| 135 |
+
|
| 136 |
+
process = subprocess.Popen(command, shell=True, cwd=ptuning_directory)
|
| 137 |
+
try:
|
| 138 |
+
process.communicate(timeout=3600*24)
|
| 139 |
+
except subprocess.TimeoutExpired:
|
| 140 |
+
process.kill()
|
| 141 |
+
return
|
crazy_functions/crazy_utils.py
ADDED
|
@@ -0,0 +1,609 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
|
| 2 |
+
import threading
|
| 3 |
+
import os
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
def input_clipping(inputs, history, max_token_limit):
|
| 7 |
+
import numpy as np
|
| 8 |
+
from request_llms.bridge_all import model_info
|
| 9 |
+
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
| 10 |
+
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
| 11 |
+
|
| 12 |
+
mode = 'input-and-history'
|
| 13 |
+
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
|
| 14 |
+
input_token_num = get_token_num(inputs)
|
| 15 |
+
if input_token_num < max_token_limit//2:
|
| 16 |
+
mode = 'only-history'
|
| 17 |
+
max_token_limit = max_token_limit - input_token_num
|
| 18 |
+
|
| 19 |
+
everything = [inputs] if mode == 'input-and-history' else ['']
|
| 20 |
+
everything.extend(history)
|
| 21 |
+
n_token = get_token_num('\n'.join(everything))
|
| 22 |
+
everything_token = [get_token_num(e) for e in everything]
|
| 23 |
+
delta = max(everything_token) // 16 # 截断时的颗粒度
|
| 24 |
+
|
| 25 |
+
while n_token > max_token_limit:
|
| 26 |
+
where = np.argmax(everything_token)
|
| 27 |
+
encoded = enc.encode(everything[where], disallowed_special=())
|
| 28 |
+
clipped_encoded = encoded[:len(encoded)-delta]
|
| 29 |
+
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
|
| 30 |
+
everything_token[where] = get_token_num(everything[where])
|
| 31 |
+
n_token = get_token_num('\n'.join(everything))
|
| 32 |
+
|
| 33 |
+
if mode == 'input-and-history':
|
| 34 |
+
inputs = everything[0]
|
| 35 |
+
else:
|
| 36 |
+
pass
|
| 37 |
+
history = everything[1:]
|
| 38 |
+
return inputs, history
|
| 39 |
+
|
| 40 |
+
def request_gpt_model_in_new_thread_with_ui_alive(
|
| 41 |
+
inputs, inputs_show_user, llm_kwargs,
|
| 42 |
+
chatbot, history, sys_prompt, refresh_interval=0.2,
|
| 43 |
+
handle_token_exceed=True,
|
| 44 |
+
retry_times_at_unknown_error=2,
|
| 45 |
+
):
|
| 46 |
+
"""
|
| 47 |
+
Request GPT model,请求GPT模型同时维持用户界面活跃。
|
| 48 |
+
|
| 49 |
+
输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
|
| 50 |
+
inputs (string): List of inputs (输入)
|
| 51 |
+
inputs_show_user (string): List of inputs to show user(展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
|
| 52 |
+
top_p (float): Top p value for sampling from model distribution (GPT参数,浮点数)
|
| 53 |
+
temperature (float): Temperature value for sampling from model distribution(GPT参数,浮点数)
|
| 54 |
+
chatbot: chatbot inputs and outputs (用户界面对话窗口句柄,用于数据流可视化)
|
| 55 |
+
history (list): List of chat history (历史,对话历史列表)
|
| 56 |
+
sys_prompt (string): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
|
| 57 |
+
refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
|
| 58 |
+
handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
|
| 59 |
+
retry_times_at_unknown_error:失败时的重试次数
|
| 60 |
+
|
| 61 |
+
输出 Returns:
|
| 62 |
+
future: 输出,GPT返回的结果
|
| 63 |
+
"""
|
| 64 |
+
import time
|
| 65 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 66 |
+
from request_llms.bridge_all import predict_no_ui_long_connection
|
| 67 |
+
# 用户反馈
|
| 68 |
+
chatbot.append([inputs_show_user, ""])
|
| 69 |
+
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
| 70 |
+
executor = ThreadPoolExecutor(max_workers=16)
|
| 71 |
+
mutable = ["", time.time(), ""]
|
| 72 |
+
# 看门狗耐心
|
| 73 |
+
watch_dog_patience = 5
|
| 74 |
+
# 请求任务
|
| 75 |
+
def _req_gpt(inputs, history, sys_prompt):
|
| 76 |
+
retry_op = retry_times_at_unknown_error
|
| 77 |
+
exceeded_cnt = 0
|
| 78 |
+
while True:
|
| 79 |
+
# watchdog error
|
| 80 |
+
if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
|
| 81 |
+
raise RuntimeError("检测到程序终止。")
|
| 82 |
+
try:
|
| 83 |
+
# 【第一种情况】:顺利完成
|
| 84 |
+
result = predict_no_ui_long_connection(
|
| 85 |
+
inputs=inputs, llm_kwargs=llm_kwargs,
|
| 86 |
+
history=history, sys_prompt=sys_prompt, observe_window=mutable)
|
| 87 |
+
return result
|
| 88 |
+
except ConnectionAbortedError as token_exceeded_error:
|
| 89 |
+
# 【第二种情况】:Token溢出
|
| 90 |
+
if handle_token_exceed:
|
| 91 |
+
exceeded_cnt += 1
|
| 92 |
+
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
| 93 |
+
from toolbox import get_reduce_token_percent
|
| 94 |
+
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
| 95 |
+
MAX_TOKEN = get_max_token(llm_kwargs)
|
| 96 |
+
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
| 97 |
+
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
| 98 |
+
mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
| 99 |
+
continue # 返回重试
|
| 100 |
+
else:
|
| 101 |
+
# 【选择放弃】
|
| 102 |
+
tb_str = '```\n' + trimmed_format_exc() + '```'
|
| 103 |
+
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 104 |
+
return mutable[0] # 放弃
|
| 105 |
+
except:
|
| 106 |
+
# 【第三种情况】:其他错误:重试几次
|
| 107 |
+
tb_str = '```\n' + trimmed_format_exc() + '```'
|
| 108 |
+
print(tb_str)
|
| 109 |
+
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 110 |
+
if retry_op > 0:
|
| 111 |
+
retry_op -= 1
|
| 112 |
+
mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
|
| 113 |
+
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
| 114 |
+
time.sleep(30)
|
| 115 |
+
time.sleep(5)
|
| 116 |
+
continue # 返回重试
|
| 117 |
+
else:
|
| 118 |
+
time.sleep(5)
|
| 119 |
+
return mutable[0] # 放弃
|
| 120 |
+
|
| 121 |
+
# 提交任务
|
| 122 |
+
future = executor.submit(_req_gpt, inputs, history, sys_prompt)
|
| 123 |
+
while True:
|
| 124 |
+
# yield一次以刷新前端页面
|
| 125 |
+
time.sleep(refresh_interval)
|
| 126 |
+
# “喂狗”(看门狗)
|
| 127 |
+
mutable[1] = time.time()
|
| 128 |
+
if future.done():
|
| 129 |
+
break
|
| 130 |
+
chatbot[-1] = [chatbot[-1][0], mutable[0]]
|
| 131 |
+
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
| 132 |
+
|
| 133 |
+
final_result = future.result()
|
| 134 |
+
chatbot[-1] = [chatbot[-1][0], final_result]
|
| 135 |
+
yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
|
| 136 |
+
return final_result
|
| 137 |
+
|
| 138 |
+
def can_multi_process(llm):
|
| 139 |
+
if llm.startswith('gpt-'): return True
|
| 140 |
+
if llm.startswith('api2d-'): return True
|
| 141 |
+
if llm.startswith('azure-'): return True
|
| 142 |
+
if llm.startswith('spark'): return True
|
| 143 |
+
if llm.startswith('zhipuai'): return True
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 147 |
+
inputs_array, inputs_show_user_array, llm_kwargs,
|
| 148 |
+
chatbot, history_array, sys_prompt_array,
|
| 149 |
+
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
|
| 150 |
+
handle_token_exceed=True, show_user_at_complete=False,
|
| 151 |
+
retry_times_at_unknown_error=2,
|
| 152 |
+
):
|
| 153 |
+
"""
|
| 154 |
+
Request GPT model using multiple threads with UI and high efficiency
|
| 155 |
+
请求GPT模型的[多线程]版。
|
| 156 |
+
具备以下功能:
|
| 157 |
+
实时在UI上反馈远程数据流
|
| 158 |
+
使用线程池,可调节线程池的大小避免openai的流量限制错误
|
| 159 |
+
处理中途中止的情况
|
| 160 |
+
网络等出问题时,会把traceback和已经接收的数据转入输出
|
| 161 |
+
|
| 162 |
+
输入参数 Args (以_array结尾的输入变量都是列表,列表长度为子任务的数量,执行时,会把列表拆解,放到每个子线程中分别执行):
|
| 163 |
+
inputs_array (list): List of inputs (每个子任务的输入)
|
| 164 |
+
inputs_show_user_array (list): List of inputs to show user(每个子任务展现在报告中的输入,借助此参数,在汇总报告中隐藏啰嗦的真实输入,增强报告的可读性)
|
| 165 |
+
llm_kwargs: llm_kwargs参数
|
| 166 |
+
chatbot: chatbot (用户界面对话窗口句柄,用于数据流可视化)
|
| 167 |
+
history_array (list): List of chat history (历史对话输入,双层列表,第一层列表是子任务分解,第二层列表是对话历史)
|
| 168 |
+
sys_prompt_array (list): List of system prompts (系统输入,列表,用于输入给GPT的前提提示,比如你是翻译官怎样怎样)
|
| 169 |
+
refresh_interval (float, optional): Refresh interval for UI (default: 0.2) (刷新时间间隔频率,建议低于1,不可高于3,仅仅服务于视觉效果)
|
| 170 |
+
max_workers (int, optional): Maximum number of threads (default: see config.py) (最大线程数,如果子任务非常多,需要用此选项防止高频地请求openai导致错误)
|
| 171 |
+
scroller_max_len (int, optional): Maximum length for scroller (default: 30)(数据流的显示最后收到的多少个字符,仅仅服务于视觉效果)
|
| 172 |
+
handle_token_exceed (bool, optional): (是否在输入过长时,自动缩减文本)
|
| 173 |
+
handle_token_exceed:是否自动处理token溢出的情况,如果选择自动处理,则会在溢出时暴力截断,默认开启
|
| 174 |
+
show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框)
|
| 175 |
+
retry_times_at_unknown_error:子任务失败时的重试次数
|
| 176 |
+
|
| 177 |
+
输出 Returns:
|
| 178 |
+
list: List of GPT model responses (每个子任务的输出汇总,如果某个子任务出错,response中会携带traceback报错信息,方��调试和定位问题。)
|
| 179 |
+
"""
|
| 180 |
+
import time, random
|
| 181 |
+
from concurrent.futures import ThreadPoolExecutor
|
| 182 |
+
from request_llms.bridge_all import predict_no_ui_long_connection
|
| 183 |
+
assert len(inputs_array) == len(history_array)
|
| 184 |
+
assert len(inputs_array) == len(sys_prompt_array)
|
| 185 |
+
if max_workers == -1: # 读取配置文件
|
| 186 |
+
try: max_workers = get_conf('DEFAULT_WORKER_NUM')
|
| 187 |
+
except: max_workers = 8
|
| 188 |
+
if max_workers <= 0: max_workers = 3
|
| 189 |
+
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
|
| 190 |
+
if not can_multi_process(llm_kwargs['llm_model']):
|
| 191 |
+
max_workers = 1
|
| 192 |
+
|
| 193 |
+
executor = ThreadPoolExecutor(max_workers=max_workers)
|
| 194 |
+
n_frag = len(inputs_array)
|
| 195 |
+
# 用户反馈
|
| 196 |
+
chatbot.append(["请开始多线程操作。", ""])
|
| 197 |
+
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
| 198 |
+
# 跨线程传递
|
| 199 |
+
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
| 200 |
+
|
| 201 |
+
# 看门狗耐心
|
| 202 |
+
watch_dog_patience = 5
|
| 203 |
+
|
| 204 |
+
# 子线程任务
|
| 205 |
+
def _req_gpt(index, inputs, history, sys_prompt):
|
| 206 |
+
gpt_say = ""
|
| 207 |
+
retry_op = retry_times_at_unknown_error
|
| 208 |
+
exceeded_cnt = 0
|
| 209 |
+
mutable[index][2] = "执行中"
|
| 210 |
+
detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
|
| 211 |
+
while True:
|
| 212 |
+
# watchdog error
|
| 213 |
+
if detect_timeout(): raise RuntimeError("检测到程序终止。")
|
| 214 |
+
try:
|
| 215 |
+
# 【第一种情况】:顺利完成
|
| 216 |
+
gpt_say = predict_no_ui_long_connection(
|
| 217 |
+
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
|
| 218 |
+
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
|
| 219 |
+
)
|
| 220 |
+
mutable[index][2] = "已成功"
|
| 221 |
+
return gpt_say
|
| 222 |
+
except ConnectionAbortedError as token_exceeded_error:
|
| 223 |
+
# 【第二种情况】:Token溢出
|
| 224 |
+
if handle_token_exceed:
|
| 225 |
+
exceeded_cnt += 1
|
| 226 |
+
# 【选择处理】 尝试计算比例,尽可能多地保留文本
|
| 227 |
+
from toolbox import get_reduce_token_percent
|
| 228 |
+
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
| 229 |
+
MAX_TOKEN = get_max_token(llm_kwargs)
|
| 230 |
+
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
| 231 |
+
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
| 232 |
+
gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
| 233 |
+
mutable[index][2] = f"截断重试"
|
| 234 |
+
continue # 返回重试
|
| 235 |
+
else:
|
| 236 |
+
# 【选择放弃】
|
| 237 |
+
tb_str = '```\n' + trimmed_format_exc() + '```'
|
| 238 |
+
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 239 |
+
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
| 240 |
+
mutable[index][2] = "输入过长已放弃"
|
| 241 |
+
return gpt_say # 放弃
|
| 242 |
+
except:
|
| 243 |
+
# 【第三种情况】:其他错误
|
| 244 |
+
if detect_timeout(): raise RuntimeError("检测到程序终止。")
|
| 245 |
+
tb_str = '```\n' + trimmed_format_exc() + '```'
|
| 246 |
+
print(tb_str)
|
| 247 |
+
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
| 248 |
+
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
| 249 |
+
if retry_op > 0:
|
| 250 |
+
retry_op -= 1
|
| 251 |
+
wait = random.randint(5, 20)
|
| 252 |
+
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
| 253 |
+
wait = wait * 3
|
| 254 |
+
fail_info = "OpenAI绑定信用卡可解除频率限制 "
|
| 255 |
+
else:
|
| 256 |
+
fail_info = ""
|
| 257 |
+
# 也许等待十几秒后,情况会好转
|
| 258 |
+
for i in range(wait):
|
| 259 |
+
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
|
| 260 |
+
# 开始重试
|
| 261 |
+
if detect_timeout(): raise RuntimeError("检测到程序终止。")
|
| 262 |
+
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
|
| 263 |
+
continue # 返回重试
|
| 264 |
+
else:
|
| 265 |
+
mutable[index][2] = "已失败"
|
| 266 |
+
wait = 5
|
| 267 |
+
time.sleep(5)
|
| 268 |
+
return gpt_say # 放弃
|
| 269 |
+
|
| 270 |
+
# 异步任务开始
|
| 271 |
+
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
|
| 272 |
+
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
|
| 273 |
+
cnt = 0
|
| 274 |
+
while True:
|
| 275 |
+
# yield一次以刷新前端页面
|
| 276 |
+
time.sleep(refresh_interval)
|
| 277 |
+
cnt += 1
|
| 278 |
+
worker_done = [h.done() for h in futures]
|
| 279 |
+
# 更好的UI视觉效果
|
| 280 |
+
observe_win = []
|
| 281 |
+
# 每个线程都要“喂狗”(看门狗)
|
| 282 |
+
for thread_index, _ in enumerate(worker_done):
|
| 283 |
+
mutable[thread_index][1] = time.time()
|
| 284 |
+
# 在前端打印些好玩的东西
|
| 285 |
+
for thread_index, _ in enumerate(worker_done):
|
| 286 |
+
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
|
| 287 |
+
replace('\n', '').replace('`', '.').replace(
|
| 288 |
+
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
| 289 |
+
observe_win.append(print_something_really_funny)
|
| 290 |
+
# 在前端打印些好玩的东西
|
| 291 |
+
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
|
| 292 |
+
if not done else f'`{mutable[thread_index][2]}`\n\n'
|
| 293 |
+
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
|
| 294 |
+
# 在前端打印些好玩的东西
|
| 295 |
+
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
|
| 296 |
+
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
| 297 |
+
if all(worker_done):
|
| 298 |
+
executor.shutdown()
|
| 299 |
+
break
|
| 300 |
+
|
| 301 |
+
# 异步任务结束
|
| 302 |
+
gpt_response_collection = []
|
| 303 |
+
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
| 304 |
+
gpt_res = f.result()
|
| 305 |
+
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
| 306 |
+
|
| 307 |
+
# 是否在结束时,在界面上显示结果
|
| 308 |
+
if show_user_at_complete:
|
| 309 |
+
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
| 310 |
+
gpt_res = f.result()
|
| 311 |
+
chatbot.append([inputs_show_user, gpt_res])
|
| 312 |
+
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
| 313 |
+
time.sleep(0.5)
|
| 314 |
+
return gpt_response_collection
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def read_and_clean_pdf_text(fp):
|
| 319 |
+
"""
|
| 320 |
+
这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
|
| 321 |
+
|
| 322 |
+
**输入参数说明**
|
| 323 |
+
- `fp`:需要读取和清理文本的pdf文件路径
|
| 324 |
+
|
| 325 |
+
**输出参数说明**
|
| 326 |
+
- `meta_txt`:清理后的文本内容字符串
|
| 327 |
+
- `page_one_meta`:第一页清理后的文本内容列表
|
| 328 |
+
|
| 329 |
+
**函数功能**
|
| 330 |
+
读取pdf文件并清理其中的文本内容,清理规则包括:
|
| 331 |
+
- 提取所有块元的文本信息,并合并为一个字符串
|
| 332 |
+
- 去除短块(字符数小于100)并替换为回车符
|
| 333 |
+
- 清理多余的空行
|
| 334 |
+
- 合并小写字母开头的段落块并替换为空格
|
| 335 |
+
- 清除重复的换行
|
| 336 |
+
- 将每个换行符替换为两个换行符,使每个段落之间有两个换行符分隔
|
| 337 |
+
"""
|
| 338 |
+
import fitz, copy
|
| 339 |
+
import re
|
| 340 |
+
import numpy as np
|
| 341 |
+
from colorful import print亮黄, print亮绿
|
| 342 |
+
fc = 0 # Index 0 文本
|
| 343 |
+
fs = 1 # Index 1 字体
|
| 344 |
+
fb = 2 # Index 2 框框
|
| 345 |
+
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
|
| 346 |
+
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
|
| 347 |
+
def primary_ffsize(l):
|
| 348 |
+
"""
|
| 349 |
+
提取文本块主字体
|
| 350 |
+
"""
|
| 351 |
+
fsize_statiscs = {}
|
| 352 |
+
for wtf in l['spans']:
|
| 353 |
+
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
|
| 354 |
+
fsize_statiscs[wtf['size']] += len(wtf['text'])
|
| 355 |
+
return max(fsize_statiscs, key=fsize_statiscs.get)
|
| 356 |
+
|
| 357 |
+
def ffsize_same(a,b):
|
| 358 |
+
"""
|
| 359 |
+
提取字体大小是否近似相等
|
| 360 |
+
"""
|
| 361 |
+
return abs((a-b)/max(a,b)) < 0.02
|
| 362 |
+
|
| 363 |
+
with fitz.open(fp) as doc:
|
| 364 |
+
meta_txt = []
|
| 365 |
+
meta_font = []
|
| 366 |
+
|
| 367 |
+
meta_line = []
|
| 368 |
+
meta_span = []
|
| 369 |
+
############################## <第 1 步,搜集初始信息> ##################################
|
| 370 |
+
for index, page in enumerate(doc):
|
| 371 |
+
# file_content += page.get_text()
|
| 372 |
+
text_areas = page.get_text("dict") # 获取页面上的文本信息
|
| 373 |
+
for t in text_areas['blocks']:
|
| 374 |
+
if 'lines' in t:
|
| 375 |
+
pf = 998
|
| 376 |
+
for l in t['lines']:
|
| 377 |
+
txt_line = "".join([wtf['text'] for wtf in l['spans']])
|
| 378 |
+
if len(txt_line) == 0: continue
|
| 379 |
+
pf = primary_ffsize(l)
|
| 380 |
+
meta_line.append([txt_line, pf, l['bbox'], l])
|
| 381 |
+
for wtf in l['spans']: # for l in t['lines']:
|
| 382 |
+
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
|
| 383 |
+
# meta_line.append(["NEW_BLOCK", pf])
|
| 384 |
+
# 块元提取 for each word segment with in line for each line cross-line words for each block
|
| 385 |
+
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
| 386 |
+
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
|
| 387 |
+
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
|
| 388 |
+
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
|
| 389 |
+
if index == 0:
|
| 390 |
+
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
| 391 |
+
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
| 392 |
+
|
| 393 |
+
############################## <第 2 步,获取正文主字体> ##################################
|
| 394 |
+
try:
|
| 395 |
+
fsize_statiscs = {}
|
| 396 |
+
for span in meta_span:
|
| 397 |
+
if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
|
| 398 |
+
fsize_statiscs[span[1]] += span[2]
|
| 399 |
+
main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
|
| 400 |
+
if REMOVE_FOOT_NOTE:
|
| 401 |
+
give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
|
| 402 |
+
except:
|
| 403 |
+
raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}。')
|
| 404 |
+
############################## <第 3 步,切分和重新整合> ##################################
|
| 405 |
+
mega_sec = []
|
| 406 |
+
sec = []
|
| 407 |
+
for index, line in enumerate(meta_line):
|
| 408 |
+
if index == 0:
|
| 409 |
+
sec.append(line[fc])
|
| 410 |
+
continue
|
| 411 |
+
if REMOVE_FOOT_NOTE:
|
| 412 |
+
if meta_line[index][fs] <= give_up_fize_threshold:
|
| 413 |
+
continue
|
| 414 |
+
if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
|
| 415 |
+
# 尝试识别段落
|
| 416 |
+
if meta_line[index][fc].endswith('.') and\
|
| 417 |
+
(meta_line[index-1][fc] != 'NEW_BLOCK') and \
|
| 418 |
+
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
|
| 419 |
+
sec[-1] += line[fc]
|
| 420 |
+
sec[-1] += "\n\n"
|
| 421 |
+
else:
|
| 422 |
+
sec[-1] += " "
|
| 423 |
+
sec[-1] += line[fc]
|
| 424 |
+
else:
|
| 425 |
+
if (index+1 < len(meta_line)) and \
|
| 426 |
+
meta_line[index][fs] > main_fsize:
|
| 427 |
+
# 单行 + 字体大
|
| 428 |
+
mega_sec.append(copy.deepcopy(sec))
|
| 429 |
+
sec = []
|
| 430 |
+
sec.append("# " + line[fc])
|
| 431 |
+
else:
|
| 432 |
+
# 尝试识别section
|
| 433 |
+
if meta_line[index-1][fs] > meta_line[index][fs]:
|
| 434 |
+
sec.append("\n" + line[fc])
|
| 435 |
+
else:
|
| 436 |
+
sec.append(line[fc])
|
| 437 |
+
mega_sec.append(copy.deepcopy(sec))
|
| 438 |
+
|
| 439 |
+
finals = []
|
| 440 |
+
for ms in mega_sec:
|
| 441 |
+
final = " ".join(ms)
|
| 442 |
+
final = final.replace('- ', ' ')
|
| 443 |
+
finals.append(final)
|
| 444 |
+
meta_txt = finals
|
| 445 |
+
|
| 446 |
+
############################## <第 4 步,乱七八糟的后处理> ##################################
|
| 447 |
+
def 把字符太少的块清除为回车(meta_txt):
|
| 448 |
+
for index, block_txt in enumerate(meta_txt):
|
| 449 |
+
if len(block_txt) < 100:
|
| 450 |
+
meta_txt[index] = '\n'
|
| 451 |
+
return meta_txt
|
| 452 |
+
meta_txt = 把字符太少的块清除为回车(meta_txt)
|
| 453 |
+
|
| 454 |
+
def 清理多余的空行(meta_txt):
|
| 455 |
+
for index in reversed(range(1, len(meta_txt))):
|
| 456 |
+
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
|
| 457 |
+
meta_txt.pop(index)
|
| 458 |
+
return meta_txt
|
| 459 |
+
meta_txt = 清理多余的空行(meta_txt)
|
| 460 |
+
|
| 461 |
+
def 合并小写开头的段落块(meta_txt):
|
| 462 |
+
def starts_with_lowercase_word(s):
|
| 463 |
+
pattern = r"^[a-z]+"
|
| 464 |
+
match = re.match(pattern, s)
|
| 465 |
+
if match:
|
| 466 |
+
return True
|
| 467 |
+
else:
|
| 468 |
+
return False
|
| 469 |
+
# 对于某些PDF会有第一个段落就以小写字母开头,为了避免索引错误将其更改为大写
|
| 470 |
+
if starts_with_lowercase_word(meta_txt[0]):
|
| 471 |
+
meta_txt[0] = meta_txt[0].capitalize()
|
| 472 |
+
for _ in range(100):
|
| 473 |
+
for index, block_txt in enumerate(meta_txt):
|
| 474 |
+
if starts_with_lowercase_word(block_txt):
|
| 475 |
+
if meta_txt[index-1] != '\n':
|
| 476 |
+
meta_txt[index-1] += ' '
|
| 477 |
+
else:
|
| 478 |
+
meta_txt[index-1] = ''
|
| 479 |
+
meta_txt[index-1] += meta_txt[index]
|
| 480 |
+
meta_txt[index] = '\n'
|
| 481 |
+
return meta_txt
|
| 482 |
+
meta_txt = 合并小写开头的段落块(meta_txt)
|
| 483 |
+
meta_txt = 清理多余的空行(meta_txt)
|
| 484 |
+
|
| 485 |
+
meta_txt = '\n'.join(meta_txt)
|
| 486 |
+
# 清除重复的换行
|
| 487 |
+
for _ in range(5):
|
| 488 |
+
meta_txt = meta_txt.replace('\n\n', '\n')
|
| 489 |
+
|
| 490 |
+
# 换行 -> 双换行
|
| 491 |
+
meta_txt = meta_txt.replace('\n', '\n\n')
|
| 492 |
+
|
| 493 |
+
############################## <第 5 步,展示分割效果> ##################################
|
| 494 |
+
# for f in finals:
|
| 495 |
+
# print亮黄(f)
|
| 496 |
+
# print亮绿('***************************')
|
| 497 |
+
|
| 498 |
+
return meta_txt, page_one_meta
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
def get_files_from_everything(txt, type): # type='.md'
|
| 502 |
+
"""
|
| 503 |
+
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
|
| 504 |
+
下面是对每个参数和返回值的说明:
|
| 505 |
+
参数
|
| 506 |
+
- txt: 路径或网址,表示要搜索的文件或者文件夹路径或网络上的文件。
|
| 507 |
+
- type: 字符串,表示要搜索的文件类型。默认是.md。
|
| 508 |
+
返回值
|
| 509 |
+
- success: 布尔值,表示函数是否成功执行。
|
| 510 |
+
- file_manifest: 文件路径列表,里面包含以指定类型为后缀名的所有文件的绝对路径。
|
| 511 |
+
- project_folder: 字符串,表示文件所在的文件夹路径。如果是网络上的文件,就是临时文件夹的路径。
|
| 512 |
+
该函数详细注释已添加,请确认是否满足您的需要。
|
| 513 |
+
"""
|
| 514 |
+
import glob, os
|
| 515 |
+
|
| 516 |
+
success = True
|
| 517 |
+
if txt.startswith('http'):
|
| 518 |
+
# 网络的远程文件
|
| 519 |
+
import requests
|
| 520 |
+
from toolbox import get_conf
|
| 521 |
+
from toolbox import get_log_folder, gen_time_str
|
| 522 |
+
proxies = get_conf('proxies')
|
| 523 |
+
try:
|
| 524 |
+
r = requests.get(txt, proxies=proxies)
|
| 525 |
+
except:
|
| 526 |
+
raise ConnectionRefusedError(f"无法下载资源{txt},请检查。")
|
| 527 |
+
path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type)
|
| 528 |
+
with open(path, 'wb+') as f: f.write(r.content)
|
| 529 |
+
project_folder = get_log_folder(plugin_name='web_download')
|
| 530 |
+
file_manifest = [path]
|
| 531 |
+
elif txt.endswith(type):
|
| 532 |
+
# 直接给定文件
|
| 533 |
+
file_manifest = [txt]
|
| 534 |
+
project_folder = os.path.dirname(txt)
|
| 535 |
+
elif os.path.exists(txt):
|
| 536 |
+
# 本地路径,递归搜索
|
| 537 |
+
project_folder = txt
|
| 538 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
|
| 539 |
+
if len(file_manifest) == 0:
|
| 540 |
+
success = False
|
| 541 |
+
else:
|
| 542 |
+
project_folder = None
|
| 543 |
+
file_manifest = []
|
| 544 |
+
success = False
|
| 545 |
+
|
| 546 |
+
return success, file_manifest, project_folder
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
@Singleton
|
| 551 |
+
class nougat_interface():
|
| 552 |
+
def __init__(self):
|
| 553 |
+
self.threadLock = threading.Lock()
|
| 554 |
+
|
| 555 |
+
def nougat_with_timeout(self, command, cwd, timeout=3600):
|
| 556 |
+
import subprocess
|
| 557 |
+
from toolbox import ProxyNetworkActivate
|
| 558 |
+
logging.info(f'正在执行命令 {command}')
|
| 559 |
+
with ProxyNetworkActivate("Nougat_Download"):
|
| 560 |
+
process = subprocess.Popen(command, shell=True, cwd=cwd, env=os.environ)
|
| 561 |
+
try:
|
| 562 |
+
stdout, stderr = process.communicate(timeout=timeout)
|
| 563 |
+
except subprocess.TimeoutExpired:
|
| 564 |
+
process.kill()
|
| 565 |
+
stdout, stderr = process.communicate()
|
| 566 |
+
print("Process timed out!")
|
| 567 |
+
return False
|
| 568 |
+
return True
|
| 569 |
+
|
| 570 |
+
|
| 571 |
+
def NOUGAT_parse_pdf(self, fp, chatbot, history):
|
| 572 |
+
from toolbox import update_ui_lastest_msg
|
| 573 |
+
|
| 574 |
+
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在排队, 等待线程锁...",
|
| 575 |
+
chatbot=chatbot, history=history, delay=0)
|
| 576 |
+
self.threadLock.acquire()
|
| 577 |
+
import glob, threading, os
|
| 578 |
+
from toolbox import get_log_folder, gen_time_str
|
| 579 |
+
dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
|
| 580 |
+
os.makedirs(dst)
|
| 581 |
+
|
| 582 |
+
yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度:正在加载NOUGAT... (提示:首次运行需要花费较长时间下载NOUGAT参数)",
|
| 583 |
+
chatbot=chatbot, history=history, delay=0)
|
| 584 |
+
self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
|
| 585 |
+
res = glob.glob(os.path.join(dst,'*.mmd'))
|
| 586 |
+
if len(res) == 0:
|
| 587 |
+
self.threadLock.release()
|
| 588 |
+
raise RuntimeError("Nougat解析论文失败。")
|
| 589 |
+
self.threadLock.release()
|
| 590 |
+
return res[0]
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
def try_install_deps(deps, reload_m=[]):
|
| 596 |
+
import subprocess, sys, importlib
|
| 597 |
+
for dep in deps:
|
| 598 |
+
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
|
| 599 |
+
import site
|
| 600 |
+
importlib.reload(site)
|
| 601 |
+
for m in reload_m:
|
| 602 |
+
importlib.reload(__import__(m))
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
def get_plugin_arg(plugin_kwargs, key, default):
|
| 606 |
+
# 如果参数是空的
|
| 607 |
+
if (key in plugin_kwargs) and (plugin_kwargs[key] == ""): plugin_kwargs.pop(key)
|
| 608 |
+
# 正常情况
|
| 609 |
+
return plugin_kwargs.get(key, default)
|
crazy_functions/gen_fns/gen_fns_shared.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time
|
| 2 |
+
import importlib
|
| 3 |
+
from toolbox import trimmed_format_exc, gen_time_str, get_log_folder
|
| 4 |
+
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
|
| 5 |
+
from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
|
| 6 |
+
import multiprocessing
|
| 7 |
+
|
| 8 |
+
def get_class_name(class_string):
|
| 9 |
+
import re
|
| 10 |
+
# Use regex to extract the class name
|
| 11 |
+
class_name = re.search(r'class (\w+)\(', class_string).group(1)
|
| 12 |
+
return class_name
|
| 13 |
+
|
| 14 |
+
def try_make_module(code, chatbot):
|
| 15 |
+
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
|
| 16 |
+
fn_path = f'{get_log_folder(plugin_name="gen_plugin_verify")}/{module_file}.py'
|
| 17 |
+
with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
|
| 18 |
+
promote_file_to_downloadzone(fn_path, chatbot=chatbot)
|
| 19 |
+
class_name = get_class_name(code)
|
| 20 |
+
manager = multiprocessing.Manager()
|
| 21 |
+
return_dict = manager.dict()
|
| 22 |
+
p = multiprocessing.Process(target=is_function_successfully_generated, args=(fn_path, class_name, return_dict))
|
| 23 |
+
# only has 10 seconds to run
|
| 24 |
+
p.start(); p.join(timeout=10)
|
| 25 |
+
if p.is_alive(): p.terminate(); p.join()
|
| 26 |
+
p.close()
|
| 27 |
+
return return_dict["success"], return_dict['traceback']
|
| 28 |
+
|
| 29 |
+
# check is_function_successfully_generated
|
| 30 |
+
def is_function_successfully_generated(fn_path, class_name, return_dict):
|
| 31 |
+
return_dict['success'] = False
|
| 32 |
+
return_dict['traceback'] = ""
|
| 33 |
+
try:
|
| 34 |
+
# Create a spec for the module
|
| 35 |
+
module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
|
| 36 |
+
# Load the module
|
| 37 |
+
example_module = importlib.util.module_from_spec(module_spec)
|
| 38 |
+
module_spec.loader.exec_module(example_module)
|
| 39 |
+
# Now you can use the module
|
| 40 |
+
some_class = getattr(example_module, class_name)
|
| 41 |
+
# Now you can create an instance of the class
|
| 42 |
+
instance = some_class()
|
| 43 |
+
return_dict['success'] = True
|
| 44 |
+
return
|
| 45 |
+
except:
|
| 46 |
+
return_dict['traceback'] = trimmed_format_exc()
|
| 47 |
+
return
|
| 48 |
+
|
| 49 |
+
def subprocess_worker(code, file_path, return_dict):
|
| 50 |
+
return_dict['result'] = None
|
| 51 |
+
return_dict['success'] = False
|
| 52 |
+
return_dict['traceback'] = ""
|
| 53 |
+
try:
|
| 54 |
+
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
|
| 55 |
+
fn_path = f'{get_log_folder(plugin_name="gen_plugin_run")}/{module_file}.py'
|
| 56 |
+
with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
|
| 57 |
+
class_name = get_class_name(code)
|
| 58 |
+
# Create a spec for the module
|
| 59 |
+
module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
|
| 60 |
+
# Load the module
|
| 61 |
+
example_module = importlib.util.module_from_spec(module_spec)
|
| 62 |
+
module_spec.loader.exec_module(example_module)
|
| 63 |
+
# Now you can use the module
|
| 64 |
+
some_class = getattr(example_module, class_name)
|
| 65 |
+
# Now you can create an instance of the class
|
| 66 |
+
instance = some_class()
|
| 67 |
+
return_dict['result'] = instance.run(file_path)
|
| 68 |
+
return_dict['success'] = True
|
| 69 |
+
except:
|
| 70 |
+
return_dict['traceback'] = trimmed_format_exc()
|
crazy_functions/ipc_fns/mp.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import platform
|
| 2 |
+
import pickle
|
| 3 |
+
import multiprocessing
|
| 4 |
+
|
| 5 |
+
def run_in_subprocess_wrapper_func(v_args):
|
| 6 |
+
func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
|
| 7 |
+
import sys
|
| 8 |
+
try:
|
| 9 |
+
result = func(*args, **kwargs)
|
| 10 |
+
return_dict['result'] = result
|
| 11 |
+
except Exception as e:
|
| 12 |
+
exc_info = sys.exc_info()
|
| 13 |
+
exception_dict['exception'] = exc_info
|
| 14 |
+
|
| 15 |
+
def run_in_subprocess_with_timeout(func, timeout=60):
|
| 16 |
+
if platform.system() == 'Linux':
|
| 17 |
+
def wrapper(*args, **kwargs):
|
| 18 |
+
return_dict = multiprocessing.Manager().dict()
|
| 19 |
+
exception_dict = multiprocessing.Manager().dict()
|
| 20 |
+
v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
|
| 21 |
+
process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
|
| 22 |
+
process.start()
|
| 23 |
+
process.join(timeout)
|
| 24 |
+
if process.is_alive():
|
| 25 |
+
process.terminate()
|
| 26 |
+
raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
|
| 27 |
+
process.close()
|
| 28 |
+
if 'exception' in exception_dict:
|
| 29 |
+
# ooops, the subprocess ran into an exception
|
| 30 |
+
exc_info = exception_dict['exception']
|
| 31 |
+
raise exc_info[1].with_traceback(exc_info[2])
|
| 32 |
+
if 'result' in return_dict.keys():
|
| 33 |
+
# If the subprocess ran successfully, return the result
|
| 34 |
+
return return_dict['result']
|
| 35 |
+
return wrapper
|
| 36 |
+
else:
|
| 37 |
+
return func
|
crazy_functions/json_fns/pydantic_io.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
https://github.com/langchain-ai/langchain/blob/master/docs/extras/modules/model_io/output_parsers/pydantic.ipynb
|
| 3 |
+
|
| 4 |
+
Example 1.
|
| 5 |
+
|
| 6 |
+
# Define your desired data structure.
|
| 7 |
+
class Joke(BaseModel):
|
| 8 |
+
setup: str = Field(description="question to set up a joke")
|
| 9 |
+
punchline: str = Field(description="answer to resolve the joke")
|
| 10 |
+
|
| 11 |
+
# You can add custom validation logic easily with Pydantic.
|
| 12 |
+
@validator("setup")
|
| 13 |
+
def question_ends_with_question_mark(cls, field):
|
| 14 |
+
if field[-1] != "?":
|
| 15 |
+
raise ValueError("Badly formed question!")
|
| 16 |
+
return field
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
Example 2.
|
| 20 |
+
|
| 21 |
+
# Here's another example, but with a compound typed field.
|
| 22 |
+
class Actor(BaseModel):
|
| 23 |
+
name: str = Field(description="name of an actor")
|
| 24 |
+
film_names: List[str] = Field(description="list of names of films they starred in")
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
import json, re, logging
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
| 31 |
+
|
| 32 |
+
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
|
| 33 |
+
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
|
| 34 |
+
|
| 35 |
+
Here is the output schema:
|
| 36 |
+
```
|
| 37 |
+
{schema}
|
| 38 |
+
```"""
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
|
| 42 |
+
```
|
| 43 |
+
{schema}
|
| 44 |
+
```"""
|
| 45 |
+
|
| 46 |
+
class JsonStringError(Exception): ...
|
| 47 |
+
|
| 48 |
+
class GptJsonIO():
|
| 49 |
+
|
| 50 |
+
def __init__(self, schema, example_instruction=True):
|
| 51 |
+
self.pydantic_object = schema
|
| 52 |
+
self.example_instruction = example_instruction
|
| 53 |
+
self.format_instructions = self.generate_format_instructions()
|
| 54 |
+
|
| 55 |
+
def generate_format_instructions(self):
|
| 56 |
+
schema = self.pydantic_object.schema()
|
| 57 |
+
|
| 58 |
+
# Remove extraneous fields.
|
| 59 |
+
reduced_schema = schema
|
| 60 |
+
if "title" in reduced_schema:
|
| 61 |
+
del reduced_schema["title"]
|
| 62 |
+
if "type" in reduced_schema:
|
| 63 |
+
del reduced_schema["type"]
|
| 64 |
+
# Ensure json in context is well-formed with double quotes.
|
| 65 |
+
if self.example_instruction:
|
| 66 |
+
schema_str = json.dumps(reduced_schema)
|
| 67 |
+
return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
|
| 68 |
+
else:
|
| 69 |
+
return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str)
|
| 70 |
+
|
| 71 |
+
def generate_output(self, text):
|
| 72 |
+
# Greedy search for 1st json candidate.
|
| 73 |
+
match = re.search(
|
| 74 |
+
r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
|
| 75 |
+
)
|
| 76 |
+
json_str = ""
|
| 77 |
+
if match: json_str = match.group()
|
| 78 |
+
json_object = json.loads(json_str, strict=False)
|
| 79 |
+
final_object = self.pydantic_object.parse_obj(json_object)
|
| 80 |
+
return final_object
|
| 81 |
+
|
| 82 |
+
def generate_repair_prompt(self, broken_json, error):
|
| 83 |
+
prompt = "Fix a broken json string.\n\n" + \
|
| 84 |
+
"(1) The broken json string need to fix is: \n\n" + \
|
| 85 |
+
"```" + "\n" + \
|
| 86 |
+
broken_json + "\n" + \
|
| 87 |
+
"```" + "\n\n" + \
|
| 88 |
+
"(2) The error message is: \n\n" + \
|
| 89 |
+
error + "\n\n" + \
|
| 90 |
+
"Now, fix this json string. \n\n"
|
| 91 |
+
return prompt
|
| 92 |
+
|
| 93 |
+
def generate_output_auto_repair(self, response, gpt_gen_fn):
|
| 94 |
+
"""
|
| 95 |
+
response: string containing canidate json
|
| 96 |
+
gpt_gen_fn: gpt_gen_fn(inputs, sys_prompt)
|
| 97 |
+
"""
|
| 98 |
+
try:
|
| 99 |
+
result = self.generate_output(response)
|
| 100 |
+
except Exception as e:
|
| 101 |
+
try:
|
| 102 |
+
logging.info(f'Repairing json:{response}')
|
| 103 |
+
repair_prompt = self.generate_repair_prompt(broken_json = response, error=repr(e))
|
| 104 |
+
result = self.generate_output(gpt_gen_fn(repair_prompt, self.format_instructions))
|
| 105 |
+
logging.info('Repaire json success.')
|
| 106 |
+
except Exception as e:
|
| 107 |
+
# 没辙了,放弃治疗
|
| 108 |
+
logging.info('Repaire json fail.')
|
| 109 |
+
raise JsonStringError('Cannot repair json.', str(e))
|
| 110 |
+
return result
|
| 111 |
+
|
crazy_functions/live_audio/aliyunASR.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import time, logging, json, sys, struct
|
| 2 |
+
import numpy as np
|
| 3 |
+
from scipy.io.wavfile import WAVE_FORMAT
|
| 4 |
+
|
| 5 |
+
def write_numpy_to_wave(filename, rate, data, add_header=False):
|
| 6 |
+
"""
|
| 7 |
+
Write a NumPy array as a WAV file.
|
| 8 |
+
"""
|
| 9 |
+
def _array_tofile(fid, data):
|
| 10 |
+
# ravel gives a c-contiguous buffer
|
| 11 |
+
fid.write(data.ravel().view('b').data)
|
| 12 |
+
|
| 13 |
+
if hasattr(filename, 'write'):
|
| 14 |
+
fid = filename
|
| 15 |
+
else:
|
| 16 |
+
fid = open(filename, 'wb')
|
| 17 |
+
|
| 18 |
+
fs = rate
|
| 19 |
+
|
| 20 |
+
try:
|
| 21 |
+
dkind = data.dtype.kind
|
| 22 |
+
if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
|
| 23 |
+
data.dtype.itemsize == 1)):
|
| 24 |
+
raise ValueError("Unsupported data type '%s'" % data.dtype)
|
| 25 |
+
|
| 26 |
+
header_data = b''
|
| 27 |
+
|
| 28 |
+
header_data += b'RIFF'
|
| 29 |
+
header_data += b'\x00\x00\x00\x00'
|
| 30 |
+
header_data += b'WAVE'
|
| 31 |
+
|
| 32 |
+
# fmt chunk
|
| 33 |
+
header_data += b'fmt '
|
| 34 |
+
if dkind == 'f':
|
| 35 |
+
format_tag = WAVE_FORMAT.IEEE_FLOAT
|
| 36 |
+
else:
|
| 37 |
+
format_tag = WAVE_FORMAT.PCM
|
| 38 |
+
if data.ndim == 1:
|
| 39 |
+
channels = 1
|
| 40 |
+
else:
|
| 41 |
+
channels = data.shape[1]
|
| 42 |
+
bit_depth = data.dtype.itemsize * 8
|
| 43 |
+
bytes_per_second = fs*(bit_depth // 8)*channels
|
| 44 |
+
block_align = channels * (bit_depth // 8)
|
| 45 |
+
|
| 46 |
+
fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
|
| 47 |
+
bytes_per_second, block_align, bit_depth)
|
| 48 |
+
if not (dkind == 'i' or dkind == 'u'):
|
| 49 |
+
# add cbSize field for non-PCM files
|
| 50 |
+
fmt_chunk_data += b'\x00\x00'
|
| 51 |
+
|
| 52 |
+
header_data += struct.pack('<I', len(fmt_chunk_data))
|
| 53 |
+
header_data += fmt_chunk_data
|
| 54 |
+
|
| 55 |
+
# fact chunk (non-PCM files)
|
| 56 |
+
if not (dkind == 'i' or dkind == 'u'):
|
| 57 |
+
header_data += b'fact'
|
| 58 |
+
header_data += struct.pack('<II', 4, data.shape[0])
|
| 59 |
+
|
| 60 |
+
# check data size (needs to be immediately before the data chunk)
|
| 61 |
+
if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
|
| 62 |
+
raise ValueError("Data exceeds wave file size limit")
|
| 63 |
+
if add_header:
|
| 64 |
+
fid.write(header_data)
|
| 65 |
+
# data chunk
|
| 66 |
+
fid.write(b'data')
|
| 67 |
+
fid.write(struct.pack('<I', data.nbytes))
|
| 68 |
+
if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
|
| 69 |
+
sys.byteorder == 'big'):
|
| 70 |
+
data = data.byteswap()
|
| 71 |
+
_array_tofile(fid, data)
|
| 72 |
+
|
| 73 |
+
if add_header:
|
| 74 |
+
# Determine file size and place it in correct
|
| 75 |
+
# position at start of the file.
|
| 76 |
+
size = fid.tell()
|
| 77 |
+
fid.seek(4)
|
| 78 |
+
fid.write(struct.pack('<I', size-8))
|
| 79 |
+
|
| 80 |
+
finally:
|
| 81 |
+
if not hasattr(filename, 'write'):
|
| 82 |
+
fid.close()
|
| 83 |
+
else:
|
| 84 |
+
fid.seek(0)
|
| 85 |
+
|
| 86 |
+
def is_speaker_speaking(vad, data, sample_rate):
|
| 87 |
+
# Function to detect if the speaker is speaking
|
| 88 |
+
# The WebRTC VAD only accepts 16-bit mono PCM audio,
|
| 89 |
+
# sampled at 8000, 16000, 32000 or 48000 Hz.
|
| 90 |
+
# A frame must be either 10, 20, or 30 ms in duration:
|
| 91 |
+
frame_duration = 30
|
| 92 |
+
n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
|
| 93 |
+
res_list = []
|
| 94 |
+
for t in range(len(data)):
|
| 95 |
+
if t!=0 and t % n_bit_each == 0:
|
| 96 |
+
res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
|
| 97 |
+
|
| 98 |
+
info = ''.join(['^' if r else '.' for r in res_list])
|
| 99 |
+
info = info[:10]
|
| 100 |
+
if any(res_list):
|
| 101 |
+
return True, info
|
| 102 |
+
else:
|
| 103 |
+
return False, info
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
class AliyunASR():
|
| 107 |
+
|
| 108 |
+
def test_on_sentence_begin(self, message, *args):
|
| 109 |
+
# print("test_on_sentence_begin:{}".format(message))
|
| 110 |
+
pass
|
| 111 |
+
|
| 112 |
+
def test_on_sentence_end(self, message, *args):
|
| 113 |
+
# print("test_on_sentence_end:{}".format(message))
|
| 114 |
+
message = json.loads(message)
|
| 115 |
+
self.parsed_sentence = message['payload']['result']
|
| 116 |
+
self.event_on_entence_end.set()
|
| 117 |
+
# print(self.parsed_sentence)
|
| 118 |
+
|
| 119 |
+
def test_on_start(self, message, *args):
|
| 120 |
+
# print("test_on_start:{}".format(message))
|
| 121 |
+
pass
|
| 122 |
+
|
| 123 |
+
def test_on_error(self, message, *args):
|
| 124 |
+
logging.error("on_error args=>{}".format(args))
|
| 125 |
+
pass
|
| 126 |
+
|
| 127 |
+
def test_on_close(self, *args):
|
| 128 |
+
self.aliyun_service_ok = False
|
| 129 |
+
pass
|
| 130 |
+
|
| 131 |
+
def test_on_result_chg(self, message, *args):
|
| 132 |
+
# print("test_on_chg:{}".format(message))
|
| 133 |
+
message = json.loads(message)
|
| 134 |
+
self.parsed_text = message['payload']['result']
|
| 135 |
+
self.event_on_result_chg.set()
|
| 136 |
+
|
| 137 |
+
def test_on_completed(self, message, *args):
|
| 138 |
+
# print("on_completed:args=>{} message=>{}".format(args, message))
|
| 139 |
+
pass
|
| 140 |
+
|
| 141 |
+
def audio_convertion_thread(self, uuid):
|
| 142 |
+
# 在一个异步线程中采集音频
|
| 143 |
+
import nls # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
|
| 144 |
+
import tempfile
|
| 145 |
+
from scipy import io
|
| 146 |
+
from toolbox import get_conf
|
| 147 |
+
from .audio_io import change_sample_rate
|
| 148 |
+
from .audio_io import RealtimeAudioDistribution
|
| 149 |
+
NEW_SAMPLERATE = 16000
|
| 150 |
+
rad = RealtimeAudioDistribution()
|
| 151 |
+
rad.clean_up()
|
| 152 |
+
temp_folder = tempfile.gettempdir()
|
| 153 |
+
TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
|
| 154 |
+
if len(TOKEN) == 0:
|
| 155 |
+
TOKEN = self.get_token()
|
| 156 |
+
self.aliyun_service_ok = True
|
| 157 |
+
URL="wss://nls-gateway.aliyuncs.com/ws/v1"
|
| 158 |
+
sr = nls.NlsSpeechTranscriber(
|
| 159 |
+
url=URL,
|
| 160 |
+
token=TOKEN,
|
| 161 |
+
appkey=APPKEY,
|
| 162 |
+
on_sentence_begin=self.test_on_sentence_begin,
|
| 163 |
+
on_sentence_end=self.test_on_sentence_end,
|
| 164 |
+
on_start=self.test_on_start,
|
| 165 |
+
on_result_changed=self.test_on_result_chg,
|
| 166 |
+
on_completed=self.test_on_completed,
|
| 167 |
+
on_error=self.test_on_error,
|
| 168 |
+
on_close=self.test_on_close,
|
| 169 |
+
callback_args=[uuid.hex]
|
| 170 |
+
)
|
| 171 |
+
timeout_limit_second = 20
|
| 172 |
+
r = sr.start(aformat="pcm",
|
| 173 |
+
timeout=timeout_limit_second,
|
| 174 |
+
enable_intermediate_result=True,
|
| 175 |
+
enable_punctuation_prediction=True,
|
| 176 |
+
enable_inverse_text_normalization=True)
|
| 177 |
+
|
| 178 |
+
import webrtcvad
|
| 179 |
+
vad = webrtcvad.Vad()
|
| 180 |
+
vad.set_mode(1)
|
| 181 |
+
|
| 182 |
+
is_previous_frame_transmitted = False # 上一帧是否有人说话
|
| 183 |
+
previous_frame_data = None
|
| 184 |
+
echo_cnt = 0 # 在没有声音之后,继续向服务器发送n次音频数据
|
| 185 |
+
echo_cnt_max = 4 # 在没有声音之后,继续向服务器发送n次音频数据
|
| 186 |
+
keep_alive_last_send_time = time.time()
|
| 187 |
+
while not self.stop:
|
| 188 |
+
# time.sleep(self.capture_interval)
|
| 189 |
+
audio = rad.read(uuid.hex)
|
| 190 |
+
if audio is not None:
|
| 191 |
+
# convert to pcm file
|
| 192 |
+
temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
|
| 193 |
+
dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
|
| 194 |
+
write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata)
|
| 195 |
+
# read pcm binary
|
| 196 |
+
with open(temp_file, "rb") as f: data = f.read()
|
| 197 |
+
is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE)
|
| 198 |
+
|
| 199 |
+
if is_speaking or echo_cnt > 0:
|
| 200 |
+
# 如果话筒激活 / 如果处于回声收尾阶段
|
| 201 |
+
echo_cnt -= 1
|
| 202 |
+
if not is_previous_frame_transmitted: # 上一帧没有人声,但是我们把上一帧同样加上
|
| 203 |
+
if previous_frame_data is not None: data = previous_frame_data + data
|
| 204 |
+
if is_speaking:
|
| 205 |
+
echo_cnt = echo_cnt_max
|
| 206 |
+
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
| 207 |
+
for i in slices: sr.send_audio(bytes(i))
|
| 208 |
+
keep_alive_last_send_time = time.time()
|
| 209 |
+
is_previous_frame_transmitted = True
|
| 210 |
+
else:
|
| 211 |
+
is_previous_frame_transmitted = False
|
| 212 |
+
echo_cnt = 0
|
| 213 |
+
# 保持链接激活,即使没有声音,也根据时间间隔,发送一些音频片段给服务器
|
| 214 |
+
if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
|
| 215 |
+
slices = zip(*(iter(data),) * 640) # 640个字节为一组
|
| 216 |
+
for i in slices: sr.send_audio(bytes(i))
|
| 217 |
+
keep_alive_last_send_time = time.time()
|
| 218 |
+
is_previous_frame_transmitted = True
|
| 219 |
+
self.audio_shape = info
|
| 220 |
+
else:
|
| 221 |
+
time.sleep(0.1)
|
| 222 |
+
|
| 223 |
+
if not self.aliyun_service_ok:
|
| 224 |
+
self.stop = True
|
| 225 |
+
self.stop_msg = 'Aliyun音频服务异常,请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
|
| 226 |
+
r = sr.stop()
|
| 227 |
+
|
| 228 |
+
def get_token(self):
|
| 229 |
+
from toolbox import get_conf
|
| 230 |
+
import json
|
| 231 |
+
from aliyunsdkcore.request import CommonRequest
|
| 232 |
+
from aliyunsdkcore.client import AcsClient
|
| 233 |
+
AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET')
|
| 234 |
+
|
| 235 |
+
# 创建AcsClient实例
|
| 236 |
+
client = AcsClient(
|
| 237 |
+
AccessKey_ID,
|
| 238 |
+
AccessKey_secret,
|
| 239 |
+
"cn-shanghai"
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
# 创建request,并设置参数。
|
| 243 |
+
request = CommonRequest()
|
| 244 |
+
request.set_method('POST')
|
| 245 |
+
request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
|
| 246 |
+
request.set_version('2019-02-28')
|
| 247 |
+
request.set_action_name('CreateToken')
|
| 248 |
+
|
| 249 |
+
try:
|
| 250 |
+
response = client.do_action_with_exception(request)
|
| 251 |
+
print(response)
|
| 252 |
+
jss = json.loads(response)
|
| 253 |
+
if 'Token' in jss and 'Id' in jss['Token']:
|
| 254 |
+
token = jss['Token']['Id']
|
| 255 |
+
expireTime = jss['Token']['ExpireTime']
|
| 256 |
+
print("token = " + token)
|
| 257 |
+
print("expireTime = " + str(expireTime))
|
| 258 |
+
except Exception as e:
|
| 259 |
+
print(e)
|
| 260 |
+
|
| 261 |
+
return token
|
crazy_functions/live_audio/audio_io.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from scipy import interpolate
|
| 3 |
+
|
| 4 |
+
def Singleton(cls):
|
| 5 |
+
_instance = {}
|
| 6 |
+
|
| 7 |
+
def _singleton(*args, **kargs):
|
| 8 |
+
if cls not in _instance:
|
| 9 |
+
_instance[cls] = cls(*args, **kargs)
|
| 10 |
+
return _instance[cls]
|
| 11 |
+
|
| 12 |
+
return _singleton
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@Singleton
|
| 16 |
+
class RealtimeAudioDistribution():
|
| 17 |
+
def __init__(self) -> None:
|
| 18 |
+
self.data = {}
|
| 19 |
+
self.max_len = 1024*1024
|
| 20 |
+
self.rate = 48000 # 只读,每秒采样数量
|
| 21 |
+
|
| 22 |
+
def clean_up(self):
|
| 23 |
+
self.data = {}
|
| 24 |
+
|
| 25 |
+
def feed(self, uuid, audio):
|
| 26 |
+
self.rate, audio_ = audio
|
| 27 |
+
# print('feed', len(audio_), audio_[-25:])
|
| 28 |
+
if uuid not in self.data:
|
| 29 |
+
self.data[uuid] = audio_
|
| 30 |
+
else:
|
| 31 |
+
new_arr = np.concatenate((self.data[uuid], audio_))
|
| 32 |
+
if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
|
| 33 |
+
self.data[uuid] = new_arr
|
| 34 |
+
|
| 35 |
+
def read(self, uuid):
|
| 36 |
+
if uuid in self.data:
|
| 37 |
+
res = self.data.pop(uuid)
|
| 38 |
+
# print('\r read-', len(res), '-', max(res), end='', flush=True)
|
| 39 |
+
else:
|
| 40 |
+
res = None
|
| 41 |
+
return res
|
| 42 |
+
|
| 43 |
+
def change_sample_rate(audio, old_sr, new_sr):
|
| 44 |
+
duration = audio.shape[0] / old_sr
|
| 45 |
+
|
| 46 |
+
time_old = np.linspace(0, duration, audio.shape[0])
|
| 47 |
+
time_new = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
|
| 48 |
+
|
| 49 |
+
interpolator = interpolate.interp1d(time_old, audio.T)
|
| 50 |
+
new_audio = interpolator(time_new).T
|
| 51 |
+
return new_audio.astype(np.int16)
|
crazy_functions/multi_stage/multi_stage_utils.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List
|
| 3 |
+
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
| 4 |
+
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
|
| 5 |
+
from request_llms.bridge_all import predict_no_ui_long_connection
|
| 6 |
+
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
| 7 |
+
import time
|
| 8 |
+
import pickle
|
| 9 |
+
|
| 10 |
+
def have_any_recent_upload_files(chatbot):
|
| 11 |
+
_5min = 5 * 60
|
| 12 |
+
if not chatbot: return False # chatbot is None
|
| 13 |
+
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
| 14 |
+
if not most_recent_uploaded: return False # most_recent_uploaded is None
|
| 15 |
+
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
|
| 16 |
+
else: return False # most_recent_uploaded is too old
|
| 17 |
+
|
| 18 |
+
class GptAcademicState():
|
| 19 |
+
def __init__(self):
|
| 20 |
+
self.reset()
|
| 21 |
+
|
| 22 |
+
def reset(self):
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
def dump_state(self, chatbot):
|
| 26 |
+
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
| 27 |
+
|
| 28 |
+
def set_state(self, chatbot, key, value):
|
| 29 |
+
setattr(self, key, value)
|
| 30 |
+
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
| 31 |
+
|
| 32 |
+
def get_state(chatbot, cls=None):
|
| 33 |
+
state = chatbot._cookies.get('plugin_state', None)
|
| 34 |
+
if state is not None: state = pickle.loads(state)
|
| 35 |
+
elif cls is not None: state = cls()
|
| 36 |
+
else: state = GptAcademicState()
|
| 37 |
+
state.chatbot = chatbot
|
| 38 |
+
return state
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class GptAcademicGameBaseState():
|
| 42 |
+
"""
|
| 43 |
+
1. first init: __init__ ->
|
| 44 |
+
"""
|
| 45 |
+
def init_game(self, chatbot, lock_plugin):
|
| 46 |
+
self.plugin_name = None
|
| 47 |
+
self.callback_fn = None
|
| 48 |
+
self.delete_game = False
|
| 49 |
+
self.step_cnt = 0
|
| 50 |
+
|
| 51 |
+
def lock_plugin(self, chatbot):
|
| 52 |
+
if self.callback_fn is None:
|
| 53 |
+
raise ValueError("callback_fn is None")
|
| 54 |
+
chatbot._cookies['lock_plugin'] = self.callback_fn
|
| 55 |
+
self.dump_state(chatbot)
|
| 56 |
+
|
| 57 |
+
def get_plugin_name(self):
|
| 58 |
+
if self.plugin_name is None:
|
| 59 |
+
raise ValueError("plugin_name is None")
|
| 60 |
+
return self.plugin_name
|
| 61 |
+
|
| 62 |
+
def dump_state(self, chatbot):
|
| 63 |
+
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
|
| 64 |
+
|
| 65 |
+
def set_state(self, chatbot, key, value):
|
| 66 |
+
setattr(self, key, value)
|
| 67 |
+
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
|
| 68 |
+
|
| 69 |
+
@staticmethod
|
| 70 |
+
def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
|
| 71 |
+
state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
|
| 72 |
+
if state is not None:
|
| 73 |
+
state = pickle.loads(state)
|
| 74 |
+
else:
|
| 75 |
+
state = cls()
|
| 76 |
+
state.init_game(chatbot, lock_plugin)
|
| 77 |
+
state.plugin_name = plugin_name
|
| 78 |
+
state.llm_kwargs = llm_kwargs
|
| 79 |
+
state.chatbot = chatbot
|
| 80 |
+
state.callback_fn = callback_fn
|
| 81 |
+
return state
|
| 82 |
+
|
| 83 |
+
def continue_game(self, prompt, chatbot, history):
|
| 84 |
+
# 游戏主体
|
| 85 |
+
yield from self.step(prompt, chatbot, history)
|
| 86 |
+
self.step_cnt += 1
|
| 87 |
+
# 保存状态,收尾
|
| 88 |
+
self.dump_state(chatbot)
|
| 89 |
+
# 如果游戏结束,清理
|
| 90 |
+
if self.delete_game:
|
| 91 |
+
chatbot._cookies['lock_plugin'] = None
|
| 92 |
+
chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
|
| 93 |
+
yield from update_ui(chatbot=chatbot, history=history)
|
crazy_functions/pdf_fns/breakdown_txt.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
|
| 2 |
+
|
| 3 |
+
def force_breakdown(txt, limit, get_token_fn):
|
| 4 |
+
""" 当无法用标点、空行分割时,我们用最暴力的方法切割
|
| 5 |
+
"""
|
| 6 |
+
for i in reversed(range(len(txt))):
|
| 7 |
+
if get_token_fn(txt[:i]) < limit:
|
| 8 |
+
return txt[:i], txt[i:]
|
| 9 |
+
return "Tiktoken未知错误", "Tiktoken未知错误"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
|
| 13 |
+
""" 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
|
| 14 |
+
当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出
|
| 15 |
+
"""
|
| 16 |
+
_min = int(5e4)
|
| 17 |
+
_max = int(1e5)
|
| 18 |
+
# print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
|
| 19 |
+
if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
|
| 20 |
+
remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
|
| 21 |
+
remain_txt_to_cut_storage = ""
|
| 22 |
+
if len(remain_txt_to_cut) > _max:
|
| 23 |
+
remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
|
| 24 |
+
remain_txt_to_cut = remain_txt_to_cut[:_max]
|
| 25 |
+
return remain_txt_to_cut, remain_txt_to_cut_storage
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
|
| 29 |
+
""" 文本切分
|
| 30 |
+
"""
|
| 31 |
+
res = []
|
| 32 |
+
total_len = len(txt_tocut)
|
| 33 |
+
fin_len = 0
|
| 34 |
+
remain_txt_to_cut = txt_tocut
|
| 35 |
+
remain_txt_to_cut_storage = ""
|
| 36 |
+
# 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
|
| 37 |
+
remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
|
| 38 |
+
|
| 39 |
+
while True:
|
| 40 |
+
if get_token_fn(remain_txt_to_cut) <= limit:
|
| 41 |
+
# 如果剩余文本的token数小于限制,那么就不用切了
|
| 42 |
+
res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
|
| 43 |
+
break
|
| 44 |
+
else:
|
| 45 |
+
# 如果剩余文本的token数大于限制,那么就切
|
| 46 |
+
lines = remain_txt_to_cut.split('\n')
|
| 47 |
+
|
| 48 |
+
# 估计一个切分点
|
| 49 |
+
estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
|
| 50 |
+
estimated_line_cut = int(estimated_line_cut)
|
| 51 |
+
|
| 52 |
+
# 开始查找合适切分点的偏移(cnt)
|
| 53 |
+
cnt = 0
|
| 54 |
+
for cnt in reversed(range(estimated_line_cut)):
|
| 55 |
+
if must_break_at_empty_line:
|
| 56 |
+
# 首先尝试用双空行(\n\n)作为切分点
|
| 57 |
+
if lines[cnt] != "":
|
| 58 |
+
continue
|
| 59 |
+
prev = "\n".join(lines[:cnt])
|
| 60 |
+
post = "\n".join(lines[cnt:])
|
| 61 |
+
if get_token_fn(prev) < limit:
|
| 62 |
+
break
|
| 63 |
+
|
| 64 |
+
if cnt == 0:
|
| 65 |
+
# 如果没有找到合适的切分点
|
| 66 |
+
if break_anyway:
|
| 67 |
+
# 是否允许暴力切分
|
| 68 |
+
prev, post = force_breakdown(remain_txt_to_cut, limit, get_token_fn)
|
| 69 |
+
else:
|
| 70 |
+
# 不允许直接报错
|
| 71 |
+
raise RuntimeError(f"存在一行极长的文本!{remain_txt_to_cut}")
|
| 72 |
+
|
| 73 |
+
# 追加列表
|
| 74 |
+
res.append(prev); fin_len+=len(prev)
|
| 75 |
+
# 准备下一次迭代
|
| 76 |
+
remain_txt_to_cut = post
|
| 77 |
+
remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
|
| 78 |
+
process = fin_len/total_len
|
| 79 |
+
print(f'正在文本切分 {int(process*100)}%')
|
| 80 |
+
if len(remain_txt_to_cut.strip()) == 0:
|
| 81 |
+
break
|
| 82 |
+
return res
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
|
| 86 |
+
""" 使用多种方式尝试切分文本,以满足 token 限制
|
| 87 |
+
"""
|
| 88 |
+
from request_llms.bridge_all import model_info
|
| 89 |
+
enc = model_info[llm_model]['tokenizer']
|
| 90 |
+
def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
|
| 91 |
+
try:
|
| 92 |
+
# 第1次尝试,将双空行(\n\n)作为切分点
|
| 93 |
+
return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
|
| 94 |
+
except RuntimeError:
|
| 95 |
+
try:
|
| 96 |
+
# 第2次尝试,将单空行(\n)作为切分点
|
| 97 |
+
return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
|
| 98 |
+
except RuntimeError:
|
| 99 |
+
try:
|
| 100 |
+
# 第3次尝试,将英文句号(.)作为切分点
|
| 101 |
+
res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
|
| 102 |
+
return [r.replace('。\n', '.') for r in res]
|
| 103 |
+
except RuntimeError as e:
|
| 104 |
+
try:
|
| 105 |
+
# 第4次尝试,将中文句号(。)作为切分点
|
| 106 |
+
res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
|
| 107 |
+
return [r.replace('。。\n', '。') for r in res]
|
| 108 |
+
except RuntimeError as e:
|
| 109 |
+
# 第5次尝试,没办法了,随便切一下吧
|
| 110 |
+
return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
|
| 111 |
+
|
| 112 |
+
breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
|
| 113 |
+
|
| 114 |
+
if __name__ == '__main__':
|
| 115 |
+
from crazy_functions.crazy_utils import read_and_clean_pdf_text
|
| 116 |
+
file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
|
| 117 |
+
|
| 118 |
+
from request_llms.bridge_all import model_info
|
| 119 |
+
for i in range(5):
|
| 120 |
+
file_content += file_content
|
| 121 |
+
|
| 122 |
+
print(len(file_content))
|
| 123 |
+
TOKEN_LIMIT_PER_FRAGMENT = 2500
|
| 124 |
+
res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
|
| 125 |
+
|
crazy_functions/pdf_fns/parse_pdf.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from functools import lru_cache
|
| 2 |
+
from toolbox import gen_time_str
|
| 3 |
+
from toolbox import promote_file_to_downloadzone
|
| 4 |
+
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
| 5 |
+
from toolbox import get_conf
|
| 6 |
+
from toolbox import ProxyNetworkActivate
|
| 7 |
+
from colorful import *
|
| 8 |
+
import requests
|
| 9 |
+
import random
|
| 10 |
+
import copy
|
| 11 |
+
import os
|
| 12 |
+
import math
|
| 13 |
+
|
| 14 |
+
class GROBID_OFFLINE_EXCEPTION(Exception): pass
|
| 15 |
+
|
| 16 |
+
def get_avail_grobid_url():
|
| 17 |
+
GROBID_URLS = get_conf('GROBID_URLS')
|
| 18 |
+
if len(GROBID_URLS) == 0: return None
|
| 19 |
+
try:
|
| 20 |
+
_grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
|
| 21 |
+
if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
|
| 22 |
+
with ProxyNetworkActivate('Connect_Grobid'):
|
| 23 |
+
res = requests.get(_grobid_url+'/api/isalive')
|
| 24 |
+
if res.text=='true': return _grobid_url
|
| 25 |
+
else: return None
|
| 26 |
+
except:
|
| 27 |
+
return None
|
| 28 |
+
|
| 29 |
+
@lru_cache(maxsize=32)
|
| 30 |
+
def parse_pdf(pdf_path, grobid_url):
|
| 31 |
+
import scipdf # pip install scipdf_parser
|
| 32 |
+
if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
|
| 33 |
+
try:
|
| 34 |
+
with ProxyNetworkActivate('Connect_Grobid'):
|
| 35 |
+
article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
|
| 36 |
+
except GROBID_OFFLINE_EXCEPTION:
|
| 37 |
+
raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用,请修改config中的GROBID_URL,可修改成本地GROBID服务。")
|
| 38 |
+
except:
|
| 39 |
+
raise RuntimeError("解析PDF失败,请检查PDF是否损坏。")
|
| 40 |
+
return article_dict
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files):
|
| 44 |
+
# -=-=-=-=-=-=-=-= 写出第1个文件:翻译前后混合 -=-=-=-=-=-=-=-=
|
| 45 |
+
res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=f"{gen_time_str()}translated_and_original.md", file_fullname=None)
|
| 46 |
+
promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
|
| 47 |
+
generated_conclusion_files.append(res_path)
|
| 48 |
+
|
| 49 |
+
# -=-=-=-=-=-=-=-= 写出第2个文件:仅翻译后的文本 -=-=-=-=-=-=-=-=
|
| 50 |
+
translated_res_array = []
|
| 51 |
+
# 记录当前的大章节标题:
|
| 52 |
+
last_section_name = ""
|
| 53 |
+
for index, value in enumerate(gpt_response_collection):
|
| 54 |
+
# 先挑选偶数序列号:
|
| 55 |
+
if index % 2 != 0:
|
| 56 |
+
# 先提取当前英文标题:
|
| 57 |
+
cur_section_name = gpt_response_collection[index-1].split('\n')[0].split(" Part")[0]
|
| 58 |
+
# 如果index是1的话,则直接使用first section name:
|
| 59 |
+
if cur_section_name != last_section_name:
|
| 60 |
+
cur_value = cur_section_name + '\n'
|
| 61 |
+
last_section_name = copy.deepcopy(cur_section_name)
|
| 62 |
+
else:
|
| 63 |
+
cur_value = ""
|
| 64 |
+
# 再做一个小修改:重新修改当前part的标题,默认用英文的
|
| 65 |
+
cur_value += value
|
| 66 |
+
translated_res_array.append(cur_value)
|
| 67 |
+
res_path = write_history_to_file(meta + ["# Meta Translation" , paper_meta_info] + translated_res_array,
|
| 68 |
+
file_basename = f"{gen_time_str()}-translated_only.md",
|
| 69 |
+
file_fullname = None,
|
| 70 |
+
auto_caption = False)
|
| 71 |
+
promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
|
| 72 |
+
generated_conclusion_files.append(res_path)
|
| 73 |
+
return res_path
|
| 74 |
+
|
| 75 |
+
def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
|
| 76 |
+
from crazy_functions.pdf_fns.report_gen_html import construct_html
|
| 77 |
+
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
| 78 |
+
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 79 |
+
from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
| 80 |
+
|
| 81 |
+
prompt = "以下是一篇学术论文的基本信息:\n"
|
| 82 |
+
# title
|
| 83 |
+
title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
|
| 84 |
+
# authors
|
| 85 |
+
authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
|
| 86 |
+
# abstract
|
| 87 |
+
abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
|
| 88 |
+
# command
|
| 89 |
+
prompt += f"请将题目和摘要翻译为{DST_LANG}。"
|
| 90 |
+
meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
|
| 91 |
+
|
| 92 |
+
# 单线,获取文章meta信息
|
| 93 |
+
paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 94 |
+
inputs=prompt,
|
| 95 |
+
inputs_show_user=prompt,
|
| 96 |
+
llm_kwargs=llm_kwargs,
|
| 97 |
+
chatbot=chatbot, history=[],
|
| 98 |
+
sys_prompt="You are an academic paper reader。",
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# 多线,翻译
|
| 102 |
+
inputs_array = []
|
| 103 |
+
inputs_show_user_array = []
|
| 104 |
+
|
| 105 |
+
# get_token_num
|
| 106 |
+
from request_llms.bridge_all import model_info
|
| 107 |
+
enc = model_info[llm_kwargs['llm_model']]['tokenizer']
|
| 108 |
+
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
| 109 |
+
|
| 110 |
+
def break_down(txt):
|
| 111 |
+
raw_token_num = get_token_num(txt)
|
| 112 |
+
if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
|
| 113 |
+
return [txt]
|
| 114 |
+
else:
|
| 115 |
+
# raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
|
| 116 |
+
# find a smooth token limit to achieve even seperation
|
| 117 |
+
count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
|
| 118 |
+
token_limit_smooth = raw_token_num // count + count
|
| 119 |
+
return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
|
| 120 |
+
|
| 121 |
+
for section in article_dict.get('sections'):
|
| 122 |
+
if len(section['text']) == 0: continue
|
| 123 |
+
section_frags = break_down(section['text'])
|
| 124 |
+
for i, fragment in enumerate(section_frags):
|
| 125 |
+
heading = section['heading']
|
| 126 |
+
if len(section_frags) > 1: heading += f' Part-{i+1}'
|
| 127 |
+
inputs_array.append(
|
| 128 |
+
f"你需要翻译{heading}章节,内容如下: \n\n{fragment}"
|
| 129 |
+
)
|
| 130 |
+
inputs_show_user_array.append(
|
| 131 |
+
f"# {heading}\n\n{fragment}"
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 135 |
+
inputs_array=inputs_array,
|
| 136 |
+
inputs_show_user_array=inputs_show_user_array,
|
| 137 |
+
llm_kwargs=llm_kwargs,
|
| 138 |
+
chatbot=chatbot,
|
| 139 |
+
history_array=[meta for _ in inputs_array],
|
| 140 |
+
sys_prompt_array=[
|
| 141 |
+
"请你作为一个学术翻译,负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
|
| 142 |
+
)
|
| 143 |
+
# -=-=-=-=-=-=-=-= 写出Markdown文件 -=-=-=-=-=-=-=-=
|
| 144 |
+
produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files)
|
| 145 |
+
|
| 146 |
+
# -=-=-=-=-=-=-=-= 写出HTML文件 -=-=-=-=-=-=-=-=
|
| 147 |
+
ch = construct_html()
|
| 148 |
+
orig = ""
|
| 149 |
+
trans = ""
|
| 150 |
+
gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
|
| 151 |
+
for i,k in enumerate(gpt_response_collection_html):
|
| 152 |
+
if i%2==0:
|
| 153 |
+
gpt_response_collection_html[i] = inputs_show_user_array[i//2]
|
| 154 |
+
else:
|
| 155 |
+
# 先提取当前英文标题:
|
| 156 |
+
cur_section_name = gpt_response_collection[i-1].split('\n')[0].split(" Part")[0]
|
| 157 |
+
cur_value = cur_section_name + "\n" + gpt_response_collection_html[i]
|
| 158 |
+
gpt_response_collection_html[i] = cur_value
|
| 159 |
+
|
| 160 |
+
final = ["", "", "一、论文概况", "", "Abstract", paper_meta_info, "二、论文翻译", ""]
|
| 161 |
+
final.extend(gpt_response_collection_html)
|
| 162 |
+
for i, k in enumerate(final):
|
| 163 |
+
if i%2==0:
|
| 164 |
+
orig = k
|
| 165 |
+
if i%2==1:
|
| 166 |
+
trans = k
|
| 167 |
+
ch.add_row(a=orig, b=trans)
|
| 168 |
+
create_report_file_name = f"{os.path.basename(fp)}.trans.html"
|
| 169 |
+
html_file = ch.save_file(create_report_file_name)
|
| 170 |
+
generated_conclusion_files.append(html_file)
|
| 171 |
+
promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)
|
crazy_functions/pdf_fns/report_gen_html.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class construct_html():
|
| 8 |
+
def __init__(self) -> None:
|
| 9 |
+
self.html_string = ""
|
| 10 |
+
|
| 11 |
+
def add_row(self, a, b):
|
| 12 |
+
from toolbox import markdown_convertion
|
| 13 |
+
template = """
|
| 14 |
+
{
|
| 15 |
+
primary_col: {
|
| 16 |
+
header: String.raw`__PRIMARY_HEADER__`,
|
| 17 |
+
msg: String.raw`__PRIMARY_MSG__`,
|
| 18 |
+
},
|
| 19 |
+
secondary_rol: {
|
| 20 |
+
header: String.raw`__SECONDARY_HEADER__`,
|
| 21 |
+
msg: String.raw`__SECONDARY_MSG__`,
|
| 22 |
+
}
|
| 23 |
+
},
|
| 24 |
+
"""
|
| 25 |
+
def std(str):
|
| 26 |
+
str = str.replace(r'`',r'`')
|
| 27 |
+
if str.endswith("\\"): str += ' '
|
| 28 |
+
if str.endswith("}"): str += ' '
|
| 29 |
+
if str.endswith("$"): str += ' '
|
| 30 |
+
return str
|
| 31 |
+
|
| 32 |
+
template_ = template
|
| 33 |
+
a_lines = a.split('\n')
|
| 34 |
+
b_lines = b.split('\n')
|
| 35 |
+
|
| 36 |
+
if len(a_lines) == 1 or len(a_lines[0]) > 50:
|
| 37 |
+
template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
|
| 38 |
+
template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
|
| 39 |
+
else:
|
| 40 |
+
template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
|
| 41 |
+
template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
|
| 42 |
+
|
| 43 |
+
if len(b_lines) == 1 or len(b_lines[0]) > 50:
|
| 44 |
+
template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
|
| 45 |
+
template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
|
| 46 |
+
else:
|
| 47 |
+
template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
|
| 48 |
+
template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
|
| 49 |
+
self.html_string += template_
|
| 50 |
+
|
| 51 |
+
def save_file(self, file_name):
|
| 52 |
+
from toolbox import get_log_folder
|
| 53 |
+
with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
|
| 54 |
+
html_template = f.read()
|
| 55 |
+
html_template = html_template.replace("__TF_ARR__", self.html_string)
|
| 56 |
+
with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
|
| 57 |
+
f.write(html_template.encode('utf-8', 'ignore').decode())
|
| 58 |
+
return os.path.join(get_log_folder(), file_name)
|
crazy_functions/pdf_fns/report_template.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
crazy_functions/vt_fns/vt_call_plugin.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List
|
| 3 |
+
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
| 4 |
+
from request_llms.bridge_all import predict_no_ui_long_connection
|
| 5 |
+
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
| 6 |
+
import copy, json, pickle, os, sys, time
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def read_avail_plugin_enum():
|
| 10 |
+
from crazy_functional import get_crazy_functions
|
| 11 |
+
plugin_arr = get_crazy_functions()
|
| 12 |
+
# remove plugins with out explaination
|
| 13 |
+
plugin_arr = {k:v for k, v in plugin_arr.items() if 'Info' in v}
|
| 14 |
+
plugin_arr_info = {"F_{:04d}".format(i):v["Info"] for i, v in enumerate(plugin_arr.values(), start=1)}
|
| 15 |
+
plugin_arr_dict = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
|
| 16 |
+
plugin_arr_dict_parse = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
|
| 17 |
+
plugin_arr_dict_parse.update({f"F_{i}":v for i, v in enumerate(plugin_arr.values(), start=1)})
|
| 18 |
+
prompt = json.dumps(plugin_arr_info, ensure_ascii=False, indent=2)
|
| 19 |
+
prompt = "\n\nThe defination of PluginEnum:\nPluginEnum=" + prompt
|
| 20 |
+
return prompt, plugin_arr_dict, plugin_arr_dict_parse
|
| 21 |
+
|
| 22 |
+
def wrap_code(txt):
|
| 23 |
+
txt = txt.replace('```','')
|
| 24 |
+
return f"\n```\n{txt}\n```\n"
|
| 25 |
+
|
| 26 |
+
def have_any_recent_upload_files(chatbot):
|
| 27 |
+
_5min = 5 * 60
|
| 28 |
+
if not chatbot: return False # chatbot is None
|
| 29 |
+
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
| 30 |
+
if not most_recent_uploaded: return False # most_recent_uploaded is None
|
| 31 |
+
if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
|
| 32 |
+
else: return False # most_recent_uploaded is too old
|
| 33 |
+
|
| 34 |
+
def get_recent_file_prompt_support(chatbot):
|
| 35 |
+
most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
|
| 36 |
+
path = most_recent_uploaded['path']
|
| 37 |
+
prompt = "\nAdditional Information:\n"
|
| 38 |
+
prompt = "In case that this plugin requires a path or a file as argument,"
|
| 39 |
+
prompt += f"it is important for you to know that the user has recently uploaded a file, located at: `{path}`"
|
| 40 |
+
prompt += f"Only use it when necessary, otherwise, you can ignore this file."
|
| 41 |
+
return prompt
|
| 42 |
+
|
| 43 |
+
def get_inputs_show_user(inputs, plugin_arr_enum_prompt):
|
| 44 |
+
# remove plugin_arr_enum_prompt from inputs string
|
| 45 |
+
inputs_show_user = inputs.replace(plugin_arr_enum_prompt, "")
|
| 46 |
+
inputs_show_user += plugin_arr_enum_prompt[:200] + '...'
|
| 47 |
+
inputs_show_user += '\n...\n'
|
| 48 |
+
inputs_show_user += '...\n'
|
| 49 |
+
inputs_show_user += '...}'
|
| 50 |
+
return inputs_show_user
|
| 51 |
+
|
| 52 |
+
def execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
| 53 |
+
plugin_arr_enum_prompt, plugin_arr_dict, plugin_arr_dict_parse = read_avail_plugin_enum()
|
| 54 |
+
class Plugin(BaseModel):
|
| 55 |
+
plugin_selection: str = Field(description="The most related plugin from one of the PluginEnum.", default="F_0000")
|
| 56 |
+
reason_of_selection: str = Field(description="The reason why you should select this plugin.", default="This plugin satisfy user requirement most")
|
| 57 |
+
# ⭐ ⭐ ⭐ 选择插件
|
| 58 |
+
yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n查找可用插件中...", chatbot=chatbot, history=history, delay=0)
|
| 59 |
+
gpt_json_io = GptJsonIO(Plugin)
|
| 60 |
+
gpt_json_io.format_instructions = "The format of your output should be a json that can be parsed by json.loads.\n"
|
| 61 |
+
gpt_json_io.format_instructions += """Output example: {"plugin_selection":"F_1234", "reason_of_selection":"F_1234 plugin satisfy user requirement most"}\n"""
|
| 62 |
+
gpt_json_io.format_instructions += "The plugins you are authorized to use are listed below:\n"
|
| 63 |
+
gpt_json_io.format_instructions += plugin_arr_enum_prompt
|
| 64 |
+
inputs = "Choose the correct plugin according to user requirements, the user requirement is: \n\n" + \
|
| 65 |
+
">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
|
| 66 |
+
|
| 67 |
+
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
|
| 68 |
+
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
| 69 |
+
try:
|
| 70 |
+
gpt_reply = run_gpt_fn(inputs, "")
|
| 71 |
+
plugin_sel = gpt_json_io.generate_output_auto_repair(gpt_reply, run_gpt_fn)
|
| 72 |
+
except JsonStringError:
|
| 73 |
+
msg = f"抱歉, {llm_kwargs['llm_model']}无法理解您的需求。"
|
| 74 |
+
msg += "请求的Prompt为:\n" + wrap_code(get_inputs_show_user(inputs, plugin_arr_enum_prompt))
|
| 75 |
+
msg += "语言模型回复为:\n" + wrap_code(gpt_reply)
|
| 76 |
+
msg += "\n但您可以尝试再试一次\n"
|
| 77 |
+
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
|
| 78 |
+
return
|
| 79 |
+
if plugin_sel.plugin_selection not in plugin_arr_dict_parse:
|
| 80 |
+
msg = f"抱歉, 找不到合适插件执行该任务, 或者{llm_kwargs['llm_model']}无法理解您的需求。"
|
| 81 |
+
msg += f"语言模型{llm_kwargs['llm_model']}选择了不存在的插件:\n" + wrap_code(gpt_reply)
|
| 82 |
+
msg += "\n但您可以尝试再试一次\n"
|
| 83 |
+
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
|
| 84 |
+
return
|
| 85 |
+
|
| 86 |
+
# ⭐ ⭐ ⭐ 确认插件参数
|
| 87 |
+
if not have_any_recent_upload_files(chatbot):
|
| 88 |
+
appendix_info = ""
|
| 89 |
+
else:
|
| 90 |
+
appendix_info = get_recent_file_prompt_support(chatbot)
|
| 91 |
+
|
| 92 |
+
plugin = plugin_arr_dict_parse[plugin_sel.plugin_selection]
|
| 93 |
+
yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n提取插件参数...", chatbot=chatbot, history=history, delay=0)
|
| 94 |
+
class PluginExplicit(BaseModel):
|
| 95 |
+
plugin_selection: str = plugin_sel.plugin_selection
|
| 96 |
+
plugin_arg: str = Field(description="The argument of the plugin.", default="")
|
| 97 |
+
gpt_json_io = GptJsonIO(PluginExplicit)
|
| 98 |
+
gpt_json_io.format_instructions += "The information about this plugin is:" + plugin["Info"]
|
| 99 |
+
inputs = f"A plugin named {plugin_sel.plugin_selection} is selected, " + \
|
| 100 |
+
"you should extract plugin_arg from the user requirement, the user requirement is: \n\n" + \
|
| 101 |
+
">> " + (txt + appendix_info).rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
|
| 102 |
+
gpt_json_io.format_instructions
|
| 103 |
+
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
|
| 104 |
+
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
| 105 |
+
plugin_sel = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# ⭐ ⭐ ⭐ 执行插件
|
| 109 |
+
fn = plugin['Function']
|
| 110 |
+
fn_name = fn.__name__
|
| 111 |
+
msg = f'{llm_kwargs["llm_model"]}为您选择了插件: `{fn_name}`\n\n插件说明:{plugin["Info"]}\n\n插件参数:{plugin_sel.plugin_arg}\n\n假如偏离了您的要求,按停止键终止。'
|
| 112 |
+
yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
|
| 113 |
+
yield from fn(plugin_sel.plugin_arg, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, -1)
|
| 114 |
+
return
|
crazy_functions/vt_fns/vt_modify_config.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List
|
| 3 |
+
from toolbox import update_ui_lastest_msg, get_conf
|
| 4 |
+
from request_llms.bridge_all import predict_no_ui_long_connection
|
| 5 |
+
from crazy_functions.json_fns.pydantic_io import GptJsonIO
|
| 6 |
+
import copy, json, pickle, os, sys
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
| 10 |
+
ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
|
| 11 |
+
if not ALLOW_RESET_CONFIG:
|
| 12 |
+
yield from update_ui_lastest_msg(
|
| 13 |
+
lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
|
| 14 |
+
chatbot=chatbot, history=history, delay=2
|
| 15 |
+
)
|
| 16 |
+
return
|
| 17 |
+
|
| 18 |
+
# ⭐ ⭐ ⭐ 读取可配置项目条目
|
| 19 |
+
names = {}
|
| 20 |
+
from enum import Enum
|
| 21 |
+
import config
|
| 22 |
+
for k, v in config.__dict__.items():
|
| 23 |
+
if k.startswith('__'): continue
|
| 24 |
+
names.update({k:k})
|
| 25 |
+
# if len(names) > 20: break # 限制最多前10个配置项,如果太多了会导致gpt无法理解
|
| 26 |
+
|
| 27 |
+
ConfigOptions = Enum('ConfigOptions', names)
|
| 28 |
+
class ModifyConfigurationIntention(BaseModel):
|
| 29 |
+
which_config_to_modify: ConfigOptions = Field(description="the name of the configuration to modify, you must choose from one of the ConfigOptions enum.", default=None)
|
| 30 |
+
new_option_value: str = Field(description="the new value of the option", default=None)
|
| 31 |
+
|
| 32 |
+
# ⭐ ⭐ ⭐ 分析用户意图
|
| 33 |
+
yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n读取新配置中", chatbot=chatbot, history=history, delay=0)
|
| 34 |
+
gpt_json_io = GptJsonIO(ModifyConfigurationIntention)
|
| 35 |
+
inputs = "Analyze how to change configuration according to following user input, answer me with json: \n\n" + \
|
| 36 |
+
">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
|
| 37 |
+
gpt_json_io.format_instructions
|
| 38 |
+
|
| 39 |
+
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
|
| 40 |
+
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
| 41 |
+
user_intention = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
|
| 42 |
+
|
| 43 |
+
explicit_conf = user_intention.which_config_to_modify.value
|
| 44 |
+
|
| 45 |
+
ok = (explicit_conf in txt)
|
| 46 |
+
if ok:
|
| 47 |
+
yield from update_ui_lastest_msg(
|
| 48 |
+
lastmsg=f"正在执行任务: {txt}\n\n新配置{explicit_conf}={user_intention.new_option_value}",
|
| 49 |
+
chatbot=chatbot, history=history, delay=1
|
| 50 |
+
)
|
| 51 |
+
yield from update_ui_lastest_msg(
|
| 52 |
+
lastmsg=f"正在执行任务: {txt}\n\n新配置{explicit_conf}={user_intention.new_option_value}\n\n正在修改配置中",
|
| 53 |
+
chatbot=chatbot, history=history, delay=2
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# ⭐ ⭐ ⭐ 立即应用配置
|
| 57 |
+
from toolbox import set_conf
|
| 58 |
+
set_conf(explicit_conf, user_intention.new_option_value)
|
| 59 |
+
|
| 60 |
+
yield from update_ui_lastest_msg(
|
| 61 |
+
lastmsg=f"正在执行任务: {txt}\n\n配置修改完成,重新页面即可生效。", chatbot=chatbot, history=history, delay=1
|
| 62 |
+
)
|
| 63 |
+
else:
|
| 64 |
+
yield from update_ui_lastest_msg(
|
| 65 |
+
lastmsg=f"失败,如果需要配置{explicit_conf},您需要明确说明并在指令中提到它。", chatbot=chatbot, history=history, delay=5
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
| 69 |
+
ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
|
| 70 |
+
if not ALLOW_RESET_CONFIG:
|
| 71 |
+
yield from update_ui_lastest_msg(
|
| 72 |
+
lastmsg=f"当前配置不允许被修改!如需激活本功能,请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
|
| 73 |
+
chatbot=chatbot, history=history, delay=2
|
| 74 |
+
)
|
| 75 |
+
return
|
| 76 |
+
|
| 77 |
+
yield from modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
|
| 78 |
+
yield from update_ui_lastest_msg(
|
| 79 |
+
lastmsg=f"正在执行任务: {txt}\n\n配置修改完成,五秒后即将重启!若出现报错请无视即可。", chatbot=chatbot, history=history, delay=5
|
| 80 |
+
)
|
| 81 |
+
os.execl(sys.executable, sys.executable, *sys.argv)
|
crazy_functions/vt_fns/vt_state.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
|
| 3 |
+
class VoidTerminalState():
|
| 4 |
+
def __init__(self):
|
| 5 |
+
self.reset_state()
|
| 6 |
+
|
| 7 |
+
def reset_state(self):
|
| 8 |
+
self.has_provided_explaination = False
|
| 9 |
+
|
| 10 |
+
def lock_plugin(self, chatbot):
|
| 11 |
+
chatbot._cookies['lock_plugin'] = 'crazy_functions.虚空终端->虚空终端'
|
| 12 |
+
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
| 13 |
+
|
| 14 |
+
def unlock_plugin(self, chatbot):
|
| 15 |
+
self.reset_state()
|
| 16 |
+
chatbot._cookies['lock_plugin'] = None
|
| 17 |
+
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
| 18 |
+
|
| 19 |
+
def set_state(self, chatbot, key, value):
|
| 20 |
+
setattr(self, key, value)
|
| 21 |
+
chatbot._cookies['plugin_state'] = pickle.dumps(self)
|
| 22 |
+
|
| 23 |
+
def get_state(chatbot):
|
| 24 |
+
state = chatbot._cookies.get('plugin_state', None)
|
| 25 |
+
if state is not None: state = pickle.loads(state)
|
| 26 |
+
else: state = VoidTerminalState()
|
| 27 |
+
state.chatbot = chatbot
|
| 28 |
+
return state
|
crazy_functions/命令行助手.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import CatchException, update_ui, gen_time_str
|
| 2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 3 |
+
from .crazy_utils import input_clipping
|
| 4 |
+
import copy, json
|
| 5 |
+
|
| 6 |
+
@CatchException
|
| 7 |
+
def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 8 |
+
"""
|
| 9 |
+
txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
|
| 10 |
+
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
|
| 11 |
+
plugin_kwargs 插件模型的参数, 暂时没有用武之地
|
| 12 |
+
chatbot 聊天显示框的句柄, 用于显示给用户
|
| 13 |
+
history 聊天历史, 前情提要
|
| 14 |
+
system_prompt 给gpt的静默提醒
|
| 15 |
+
web_port 当前软件运行的端口号
|
| 16 |
+
"""
|
| 17 |
+
# 清空历史, 以免输入溢出
|
| 18 |
+
history = []
|
| 19 |
+
|
| 20 |
+
# 输入
|
| 21 |
+
i_say = "请写bash命令实现以下功能:" + txt
|
| 22 |
+
# 开始
|
| 23 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 24 |
+
inputs=i_say, inputs_show_user=txt,
|
| 25 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
| 26 |
+
sys_prompt="你是一个Linux大师级用户。注意,当我要求你写bash命令时,尽可能地仅用一行命令解决我的要求。"
|
| 27 |
+
)
|
| 28 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
crazy_functions/对话历史存档.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder, get_user
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
f_prefix = 'GPT-Academic对话存档'
|
| 5 |
+
|
| 6 |
+
def write_chat_to_file(chatbot, history=None, file_name=None):
|
| 7 |
+
"""
|
| 8 |
+
将对话记录history以Markdown格式写入文件中。如果没有指定文件名,则使用当前时间生成文件名。
|
| 9 |
+
"""
|
| 10 |
+
import os
|
| 11 |
+
import time
|
| 12 |
+
if file_name is None:
|
| 13 |
+
file_name = f_prefix + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
|
| 14 |
+
fp = os.path.join(get_log_folder(get_user(chatbot), plugin_name='chat_history'), file_name)
|
| 15 |
+
with open(fp, 'w', encoding='utf8') as f:
|
| 16 |
+
from themes.theme import advanced_css
|
| 17 |
+
f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
|
| 18 |
+
for i, contents in enumerate(chatbot):
|
| 19 |
+
for j, content in enumerate(contents):
|
| 20 |
+
try: # 这个bug没找到触发条件,暂时先这样顶一下
|
| 21 |
+
if type(content) != str: content = str(content)
|
| 22 |
+
except:
|
| 23 |
+
continue
|
| 24 |
+
f.write(content)
|
| 25 |
+
if j == 0:
|
| 26 |
+
f.write('<hr style="border-top: dotted 3px #ccc;">')
|
| 27 |
+
f.write('<hr color="red"> \n\n')
|
| 28 |
+
f.write('<hr color="blue"> \n\n raw chat context:\n')
|
| 29 |
+
f.write('<code>')
|
| 30 |
+
for h in history:
|
| 31 |
+
f.write("\n>>>" + h)
|
| 32 |
+
f.write('</code>')
|
| 33 |
+
promote_file_to_downloadzone(fp, rename_file=file_name, chatbot=chatbot)
|
| 34 |
+
return '对话历史写入:' + fp
|
| 35 |
+
|
| 36 |
+
def gen_file_preview(file_name):
|
| 37 |
+
try:
|
| 38 |
+
with open(file_name, 'r', encoding='utf8') as f:
|
| 39 |
+
file_content = f.read()
|
| 40 |
+
# pattern to match the text between <head> and </head>
|
| 41 |
+
pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
|
| 42 |
+
file_content = re.sub(pattern, '', file_content)
|
| 43 |
+
html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
|
| 44 |
+
history = history.strip('<code>')
|
| 45 |
+
history = history.strip('</code>')
|
| 46 |
+
history = history.split("\n>>>")
|
| 47 |
+
return list(filter(lambda x:x!="", history))[0][:100]
|
| 48 |
+
except:
|
| 49 |
+
return ""
|
| 50 |
+
|
| 51 |
+
def read_file_to_chat(chatbot, history, file_name):
|
| 52 |
+
with open(file_name, 'r', encoding='utf8') as f:
|
| 53 |
+
file_content = f.read()
|
| 54 |
+
# pattern to match the text between <head> and </head>
|
| 55 |
+
pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
|
| 56 |
+
file_content = re.sub(pattern, '', file_content)
|
| 57 |
+
html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
|
| 58 |
+
history = history.strip('<code>')
|
| 59 |
+
history = history.strip('</code>')
|
| 60 |
+
history = history.split("\n>>>")
|
| 61 |
+
history = list(filter(lambda x:x!="", history))
|
| 62 |
+
html = html.split('<hr color="red"> \n\n')
|
| 63 |
+
html = list(filter(lambda x:x!="", html))
|
| 64 |
+
chatbot.clear()
|
| 65 |
+
for i, h in enumerate(html):
|
| 66 |
+
i_say, gpt_say = h.split('<hr style="border-top: dotted 3px #ccc;">')
|
| 67 |
+
chatbot.append([i_say, gpt_say])
|
| 68 |
+
chatbot.append([f"存档文件详情?", f"[Local Message] 载入对话{len(html)}条,上下文{len(history)}条。"])
|
| 69 |
+
return chatbot, history
|
| 70 |
+
|
| 71 |
+
@CatchException
|
| 72 |
+
def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 73 |
+
"""
|
| 74 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 75 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 76 |
+
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
| 77 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 78 |
+
history 聊天历史,前情提要
|
| 79 |
+
system_prompt 给gpt的静默提醒
|
| 80 |
+
web_port 当前软件运行的端口号
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
chatbot.append(("保存当前对话",
|
| 84 |
+
f"[Local Message] {write_chat_to_file(chatbot, history)},您可以调用下拉菜单中的“载入对话历史存档”还原当下的对话。"))
|
| 85 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 86 |
+
|
| 87 |
+
def hide_cwd(str):
|
| 88 |
+
import os
|
| 89 |
+
current_path = os.getcwd()
|
| 90 |
+
replace_path = "."
|
| 91 |
+
return str.replace(current_path, replace_path)
|
| 92 |
+
|
| 93 |
+
@CatchException
|
| 94 |
+
def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 95 |
+
"""
|
| 96 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 97 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 98 |
+
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
| 99 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 100 |
+
history 聊天历史,前情提要
|
| 101 |
+
system_prompt 给gpt的静默提醒
|
| 102 |
+
web_port 当前软件运行的端口号
|
| 103 |
+
"""
|
| 104 |
+
from .crazy_utils import get_files_from_everything
|
| 105 |
+
success, file_manifest, _ = get_files_from_everything(txt, type='.html')
|
| 106 |
+
|
| 107 |
+
if not success:
|
| 108 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 109 |
+
import glob
|
| 110 |
+
local_history = "<br/>".join([
|
| 111 |
+
"`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`"
|
| 112 |
+
for f in glob.glob(
|
| 113 |
+
f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html',
|
| 114 |
+
recursive=True
|
| 115 |
+
)])
|
| 116 |
+
chatbot.append([f"正在查找对话历史文件(html格式): {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件,您可以将任意一个文件路径粘贴到输入区,然后重试:<br/>{local_history}"])
|
| 117 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 118 |
+
return
|
| 119 |
+
|
| 120 |
+
try:
|
| 121 |
+
chatbot, history = read_file_to_chat(chatbot, history, file_manifest[0])
|
| 122 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 123 |
+
except:
|
| 124 |
+
chatbot.append([f"载入对话历史文件", f"对话历史文件损坏!"])
|
| 125 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 126 |
+
return
|
| 127 |
+
|
| 128 |
+
@CatchException
|
| 129 |
+
def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 130 |
+
"""
|
| 131 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 132 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 133 |
+
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
| 134 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 135 |
+
history 聊天历史,前情提要
|
| 136 |
+
system_prompt 给gpt的静默提醒
|
| 137 |
+
web_port 当前软件运行的端口号
|
| 138 |
+
"""
|
| 139 |
+
|
| 140 |
+
import glob, os
|
| 141 |
+
local_history = "<br/>".join([
|
| 142 |
+
"`"+hide_cwd(f)+"`"
|
| 143 |
+
for f in glob.glob(
|
| 144 |
+
f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True
|
| 145 |
+
)])
|
| 146 |
+
for f in glob.glob(f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True):
|
| 147 |
+
os.remove(f)
|
| 148 |
+
chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
|
| 149 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 150 |
+
return
|
| 151 |
+
|
| 152 |
+
|
crazy_functions/生成函数注释.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import update_ui
|
| 2 |
+
from toolbox import CatchException, report_exception
|
| 3 |
+
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
| 4 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 5 |
+
fast_debug = False
|
| 6 |
+
|
| 7 |
+
def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
| 8 |
+
import time, os
|
| 9 |
+
print('begin analysis on:', file_manifest)
|
| 10 |
+
for index, fp in enumerate(file_manifest):
|
| 11 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
| 12 |
+
file_content = f.read()
|
| 13 |
+
|
| 14 |
+
i_say = f'请对下面的程序文件做一个概述,并对文件中的所有函数生成注释,使用markdown表格输出结果,文件名是{os.path.relpath(fp, project_folder)},文件内容是 ```{file_content}```'
|
| 15 |
+
i_say_show_user = f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述,并对文件中的所有函数生成注释: {os.path.abspath(fp)}'
|
| 16 |
+
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
| 17 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 18 |
+
|
| 19 |
+
if not fast_debug:
|
| 20 |
+
msg = '正常'
|
| 21 |
+
# ** gpt request **
|
| 22 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 23 |
+
i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
|
| 24 |
+
|
| 25 |
+
chatbot[-1] = (i_say_show_user, gpt_say)
|
| 26 |
+
history.append(i_say_show_user); history.append(gpt_say)
|
| 27 |
+
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
| 28 |
+
if not fast_debug: time.sleep(2)
|
| 29 |
+
|
| 30 |
+
if not fast_debug:
|
| 31 |
+
res = write_history_to_file(history)
|
| 32 |
+
promote_file_to_downloadzone(res, chatbot=chatbot)
|
| 33 |
+
chatbot.append(("完成了吗?", res))
|
| 34 |
+
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@CatchException
|
| 39 |
+
def 批量生成函数注释(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 40 |
+
history = [] # 清空历史,以免输入溢出
|
| 41 |
+
import glob, os
|
| 42 |
+
if os.path.exists(txt):
|
| 43 |
+
project_folder = txt
|
| 44 |
+
else:
|
| 45 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 46 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 47 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 48 |
+
return
|
| 49 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)] + \
|
| 50 |
+
[f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)]
|
| 51 |
+
|
| 52 |
+
if len(file_manifest) == 0:
|
| 53 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
| 54 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 55 |
+
return
|
| 56 |
+
yield from 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
crazy_functions/联网的ChatGPT.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import CatchException, update_ui
|
| 2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
| 3 |
+
import requests
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
from request_llms.bridge_all import model_info
|
| 6 |
+
|
| 7 |
+
def google(query, proxies):
|
| 8 |
+
query = query # 在此处替换您要搜索的关键词
|
| 9 |
+
url = f"https://www.google.com/search?q={query}"
|
| 10 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
|
| 11 |
+
response = requests.get(url, headers=headers, proxies=proxies)
|
| 12 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 13 |
+
results = []
|
| 14 |
+
for g in soup.find_all('div', class_='g'):
|
| 15 |
+
anchors = g.find_all('a')
|
| 16 |
+
if anchors:
|
| 17 |
+
link = anchors[0]['href']
|
| 18 |
+
if link.startswith('/url?q='):
|
| 19 |
+
link = link[7:]
|
| 20 |
+
if not link.startswith('http'):
|
| 21 |
+
continue
|
| 22 |
+
title = g.find('h3').text
|
| 23 |
+
item = {'title': title, 'link': link}
|
| 24 |
+
results.append(item)
|
| 25 |
+
|
| 26 |
+
for r in results:
|
| 27 |
+
print(r['link'])
|
| 28 |
+
return results
|
| 29 |
+
|
| 30 |
+
def scrape_text(url, proxies) -> str:
|
| 31 |
+
"""Scrape text from a webpage
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
url (str): The URL to scrape text from
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
str: The scraped text
|
| 38 |
+
"""
|
| 39 |
+
headers = {
|
| 40 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
|
| 41 |
+
'Content-Type': 'text/plain',
|
| 42 |
+
}
|
| 43 |
+
try:
|
| 44 |
+
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
|
| 45 |
+
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
|
| 46 |
+
except:
|
| 47 |
+
return "无法连接到该网页"
|
| 48 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 49 |
+
for script in soup(["script", "style"]):
|
| 50 |
+
script.extract()
|
| 51 |
+
text = soup.get_text()
|
| 52 |
+
lines = (line.strip() for line in text.splitlines())
|
| 53 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 54 |
+
text = "\n".join(chunk for chunk in chunks if chunk)
|
| 55 |
+
return text
|
| 56 |
+
|
| 57 |
+
@CatchException
|
| 58 |
+
def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 59 |
+
"""
|
| 60 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 61 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 62 |
+
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
| 63 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 64 |
+
history 聊天历史,前情提要
|
| 65 |
+
system_prompt 给gpt的静默提醒
|
| 66 |
+
web_port 当前软件运行的端口号
|
| 67 |
+
"""
|
| 68 |
+
history = [] # 清空历史,以免输入溢出
|
| 69 |
+
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
|
| 70 |
+
"[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
|
| 71 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 72 |
+
|
| 73 |
+
# ------------- < 第1步:爬取搜索引擎的结果 > -------------
|
| 74 |
+
from toolbox import get_conf
|
| 75 |
+
proxies = get_conf('proxies')
|
| 76 |
+
urls = google(txt, proxies)
|
| 77 |
+
history = []
|
| 78 |
+
if len(urls) == 0:
|
| 79 |
+
chatbot.append((f"结论:{txt}",
|
| 80 |
+
"[Local Message] 受到google限制,无法从google获取信息!"))
|
| 81 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 82 |
+
return
|
| 83 |
+
# ------------- < 第2步:依次访问网页 > -------------
|
| 84 |
+
max_search_result = 5 # 最多收纳多少个网页的结果
|
| 85 |
+
for index, url in enumerate(urls[:max_search_result]):
|
| 86 |
+
res = scrape_text(url['link'], proxies)
|
| 87 |
+
history.extend([f"第{index}份搜索结果:", res])
|
| 88 |
+
chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
|
| 89 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 90 |
+
|
| 91 |
+
# ------------- < 第3步:ChatGPT综合 > -------------
|
| 92 |
+
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
|
| 93 |
+
i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
|
| 94 |
+
inputs=i_say,
|
| 95 |
+
history=history,
|
| 96 |
+
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
|
| 97 |
+
)
|
| 98 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 99 |
+
inputs=i_say, inputs_show_user=i_say,
|
| 100 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
| 101 |
+
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
|
| 102 |
+
)
|
| 103 |
+
chatbot[-1] = (i_say, gpt_say)
|
| 104 |
+
history.append(i_say);history.append(gpt_say)
|
| 105 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
| 106 |
+
|
crazy_functions/联网的ChatGPT_bing版.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import CatchException, update_ui
|
| 2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
| 3 |
+
import requests
|
| 4 |
+
from bs4 import BeautifulSoup
|
| 5 |
+
from request_llms.bridge_all import model_info
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def bing_search(query, proxies=None):
|
| 9 |
+
query = query
|
| 10 |
+
url = f"https://cn.bing.com/search?q={query}"
|
| 11 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
|
| 12 |
+
response = requests.get(url, headers=headers, proxies=proxies)
|
| 13 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 14 |
+
results = []
|
| 15 |
+
for g in soup.find_all('li', class_='b_algo'):
|
| 16 |
+
anchors = g.find_all('a')
|
| 17 |
+
if anchors:
|
| 18 |
+
link = anchors[0]['href']
|
| 19 |
+
if not link.startswith('http'):
|
| 20 |
+
continue
|
| 21 |
+
title = g.find('h2').text
|
| 22 |
+
item = {'title': title, 'link': link}
|
| 23 |
+
results.append(item)
|
| 24 |
+
|
| 25 |
+
for r in results:
|
| 26 |
+
print(r['link'])
|
| 27 |
+
return results
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def scrape_text(url, proxies) -> str:
|
| 31 |
+
"""Scrape text from a webpage
|
| 32 |
+
|
| 33 |
+
Args:
|
| 34 |
+
url (str): The URL to scrape text from
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
str: The scraped text
|
| 38 |
+
"""
|
| 39 |
+
headers = {
|
| 40 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
|
| 41 |
+
'Content-Type': 'text/plain',
|
| 42 |
+
}
|
| 43 |
+
try:
|
| 44 |
+
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
|
| 45 |
+
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
|
| 46 |
+
except:
|
| 47 |
+
return "无法连接到该网页"
|
| 48 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 49 |
+
for script in soup(["script", "style"]):
|
| 50 |
+
script.extract()
|
| 51 |
+
text = soup.get_text()
|
| 52 |
+
lines = (line.strip() for line in text.splitlines())
|
| 53 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 54 |
+
text = "\n".join(chunk for chunk in chunks if chunk)
|
| 55 |
+
return text
|
| 56 |
+
|
| 57 |
+
@CatchException
|
| 58 |
+
def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 59 |
+
"""
|
| 60 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 61 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 62 |
+
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
| 63 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 64 |
+
history 聊天历史,前情提要
|
| 65 |
+
system_prompt 给gpt的静默提醒
|
| 66 |
+
web_port 当前软件运行的端口号
|
| 67 |
+
"""
|
| 68 |
+
history = [] # 清空历史,以免输入溢出
|
| 69 |
+
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
|
| 70 |
+
"[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
|
| 71 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 72 |
+
|
| 73 |
+
# ------------- < 第1步:爬取搜索引擎的结果 > -------------
|
| 74 |
+
from toolbox import get_conf
|
| 75 |
+
proxies = get_conf('proxies')
|
| 76 |
+
urls = bing_search(txt, proxies)
|
| 77 |
+
history = []
|
| 78 |
+
if len(urls) == 0:
|
| 79 |
+
chatbot.append((f"结论:{txt}",
|
| 80 |
+
"[Local Message] 受到bing限制,无法从bing获取信息!"))
|
| 81 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 82 |
+
return
|
| 83 |
+
# ------------- < 第2步:依次访问网页 > -------------
|
| 84 |
+
max_search_result = 8 # 最多收纳多少个网页的结果
|
| 85 |
+
for index, url in enumerate(urls[:max_search_result]):
|
| 86 |
+
res = scrape_text(url['link'], proxies)
|
| 87 |
+
history.extend([f"第{index}份搜索结果:", res])
|
| 88 |
+
chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
|
| 89 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 90 |
+
|
| 91 |
+
# ------------- < 第3步:ChatGPT综合 > -------------
|
| 92 |
+
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
|
| 93 |
+
i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
|
| 94 |
+
inputs=i_say,
|
| 95 |
+
history=history,
|
| 96 |
+
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
|
| 97 |
+
)
|
| 98 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 99 |
+
inputs=i_say, inputs_show_user=i_say,
|
| 100 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
| 101 |
+
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行���结,然后回答问题。"
|
| 102 |
+
)
|
| 103 |
+
chatbot[-1] = (i_say, gpt_say)
|
| 104 |
+
history.append(i_say);history.append(gpt_say)
|
| 105 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
| 106 |
+
|
crazy_functions/虚空终端.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Explanation of the Void Terminal Plugin:
|
| 3 |
+
|
| 4 |
+
Please describe in natural language what you want to do.
|
| 5 |
+
|
| 6 |
+
1. You can open the plugin's dropdown menu to explore various capabilities of this project, and then describe your needs in natural language, for example:
|
| 7 |
+
- "Please call the plugin to translate a PDF paper for me. I just uploaded the paper to the upload area."
|
| 8 |
+
- "Please use the plugin to translate a PDF paper, with the address being https://www.nature.com/articles/s41586-019-1724-z.pdf."
|
| 9 |
+
- "Generate an image with blooming flowers and lush green grass using the plugin."
|
| 10 |
+
- "Translate the README using the plugin. The GitHub URL is https://github.com/facebookresearch/co-tracker."
|
| 11 |
+
- "Translate an Arxiv paper for me. The Arxiv ID is 1812.10695. Remember to use the plugin and don't do it manually!"
|
| 12 |
+
- "I don't like the current interface color. Modify the configuration and change the theme to THEME="High-Contrast"."
|
| 13 |
+
- "Could you please explain the structure of the Transformer network?"
|
| 14 |
+
|
| 15 |
+
2. If you use keywords like "call the plugin xxx", "modify the configuration xxx", "please", etc., your intention can be recognized more accurately.
|
| 16 |
+
|
| 17 |
+
3. Your intention can be recognized more accurately when using powerful models like GPT4. This plugin is relatively new, so please feel free to provide feedback on GitHub.
|
| 18 |
+
|
| 19 |
+
4. Now, if you need to process a file, please upload the file (drag the file to the file upload area) or describe the path to the file.
|
| 20 |
+
|
| 21 |
+
5. If you don't need to upload a file, you can simply repeat your command again.
|
| 22 |
+
"""
|
| 23 |
+
explain_msg = """
|
| 24 |
+
## 虚空终端插件说明:
|
| 25 |
+
|
| 26 |
+
1. 请用**自然语言**描述您需要做什么。例如:
|
| 27 |
+
- 「请调用插件,为我翻译PDF论文,论文我刚刚放到上传区了」
|
| 28 |
+
- 「请调用插件翻译PDF论文,地址为https://openreview.net/pdf?id=rJl0r3R9KX」
|
| 29 |
+
- 「把Arxiv论文翻译成中文PDF,arxiv论文的ID是1812.10695,记得用插件!」
|
| 30 |
+
- 「生成一张图片,图中鲜花怒放,绿草如茵,用插件实现」
|
| 31 |
+
- 「用插件翻译README,Github网址是https://github.com/facebookresearch/co-tracker」
|
| 32 |
+
- 「我不喜欢当前的界面颜色,修改配置,把主题THEME更换为THEME="High-Contrast"」
|
| 33 |
+
- 「请调用插件,解析python源代码项目,代码我刚刚打包拖到上传区了」
|
| 34 |
+
- 「请问Transformer网络的结构是怎样的?」
|
| 35 |
+
|
| 36 |
+
2. 您可以打开插件下拉菜单以了解本项目的各种能力。
|
| 37 |
+
|
| 38 |
+
3. 如果您使用「调用插件xxx」、「修改配置xxx」、「请问」等关键词,您的意图可以被识别的更准确。
|
| 39 |
+
|
| 40 |
+
4. 建议使用 GPT3.5 或更强的模型,弱模型可能无法理解您的想法。该插件诞生时间不长,欢迎您前往Github反馈问题。
|
| 41 |
+
|
| 42 |
+
5. 现在,如果需要处理文件,请您上传文件(将文件拖动到文件上传区),或者描述文件所在的路径。
|
| 43 |
+
|
| 44 |
+
6. 如果不需要上传文件,现在您只需要再次重复一次您的指令即可。
|
| 45 |
+
"""
|
| 46 |
+
|
| 47 |
+
from pydantic import BaseModel, Field
|
| 48 |
+
from typing import List
|
| 49 |
+
from toolbox import CatchException, update_ui, is_the_upload_folder
|
| 50 |
+
from toolbox import update_ui_lastest_msg, disable_auto_promotion
|
| 51 |
+
from request_llms.bridge_all import predict_no_ui_long_connection
|
| 52 |
+
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 53 |
+
from crazy_functions.crazy_utils import input_clipping
|
| 54 |
+
from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
|
| 55 |
+
from crazy_functions.vt_fns.vt_state import VoidTerminalState
|
| 56 |
+
from crazy_functions.vt_fns.vt_modify_config import modify_configuration_hot
|
| 57 |
+
from crazy_functions.vt_fns.vt_modify_config import modify_configuration_reboot
|
| 58 |
+
from crazy_functions.vt_fns.vt_call_plugin import execute_plugin
|
| 59 |
+
|
| 60 |
+
class UserIntention(BaseModel):
|
| 61 |
+
user_prompt: str = Field(description="the content of user input", default="")
|
| 62 |
+
intention_type: str = Field(description="the type of user intention, choose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']", default="ExecutePlugin")
|
| 63 |
+
user_provide_file: bool = Field(description="whether the user provides a path to a file", default=False)
|
| 64 |
+
user_provide_url: bool = Field(description="whether the user provides a url", default=False)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
|
| 68 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 69 |
+
inputs=txt, inputs_show_user=txt,
|
| 70 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
| 71 |
+
sys_prompt=system_prompt
|
| 72 |
+
)
|
| 73 |
+
chatbot[-1] = [txt, gpt_say]
|
| 74 |
+
history.extend([txt, gpt_say])
|
| 75 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 76 |
+
pass
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
explain_intention_to_user = {
|
| 80 |
+
'Chat': "聊天对话",
|
| 81 |
+
'ExecutePlugin': "调用插件",
|
| 82 |
+
'ModifyConfiguration': "修改配置",
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def analyze_intention_with_simple_rules(txt):
|
| 87 |
+
user_intention = UserIntention()
|
| 88 |
+
user_intention.user_prompt = txt
|
| 89 |
+
is_certain = False
|
| 90 |
+
|
| 91 |
+
if '请问' in txt:
|
| 92 |
+
is_certain = True
|
| 93 |
+
user_intention.intention_type = 'Chat'
|
| 94 |
+
|
| 95 |
+
if '用插件' in txt:
|
| 96 |
+
is_certain = True
|
| 97 |
+
user_intention.intention_type = 'ExecutePlugin'
|
| 98 |
+
|
| 99 |
+
if '修改配置' in txt:
|
| 100 |
+
is_certain = True
|
| 101 |
+
user_intention.intention_type = 'ModifyConfiguration'
|
| 102 |
+
|
| 103 |
+
return is_certain, user_intention
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
@CatchException
|
| 107 |
+
def 虚空终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 108 |
+
disable_auto_promotion(chatbot=chatbot)
|
| 109 |
+
# 获取当前虚空终端状态
|
| 110 |
+
state = VoidTerminalState.get_state(chatbot)
|
| 111 |
+
appendix_msg = ""
|
| 112 |
+
|
| 113 |
+
# 用简单的关键词检测用户意图
|
| 114 |
+
is_certain, _ = analyze_intention_with_simple_rules(txt)
|
| 115 |
+
if is_the_upload_folder(txt):
|
| 116 |
+
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=False)
|
| 117 |
+
appendix_msg = "\n\n**很好,您已经上传了文件**,现在请您描述您的需求。"
|
| 118 |
+
|
| 119 |
+
if is_certain or (state.has_provided_explaination):
|
| 120 |
+
# 如果意图明确,跳过提示环节
|
| 121 |
+
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
|
| 122 |
+
state.unlock_plugin(chatbot=chatbot)
|
| 123 |
+
yield from update_ui(chatbot=chatbot, history=history)
|
| 124 |
+
yield from 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
|
| 125 |
+
return
|
| 126 |
+
else:
|
| 127 |
+
# 如果意图模糊,提示
|
| 128 |
+
state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
|
| 129 |
+
state.lock_plugin(chatbot=chatbot)
|
| 130 |
+
chatbot.append(("虚空终端状态:", explain_msg+appendix_msg))
|
| 131 |
+
yield from update_ui(chatbot=chatbot, history=history)
|
| 132 |
+
return
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 137 |
+
history = []
|
| 138 |
+
chatbot.append(("虚空终端状态: ", f"正在执行任务: {txt}"))
|
| 139 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 140 |
+
|
| 141 |
+
# ⭐ ⭐ ⭐ 分析用户意图
|
| 142 |
+
is_certain, user_intention = analyze_intention_with_simple_rules(txt)
|
| 143 |
+
if not is_certain:
|
| 144 |
+
yield from update_ui_lastest_msg(
|
| 145 |
+
lastmsg=f"正在执行任务: {txt}\n\n分析用户意图中", chatbot=chatbot, history=history, delay=0)
|
| 146 |
+
gpt_json_io = GptJsonIO(UserIntention)
|
| 147 |
+
rf_req = "\nchoose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']"
|
| 148 |
+
inputs = "Analyze the intention of the user according to following user input: \n\n" + \
|
| 149 |
+
">> " + (txt+rf_req).rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
|
| 150 |
+
run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
|
| 151 |
+
inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
|
| 152 |
+
analyze_res = run_gpt_fn(inputs, "")
|
| 153 |
+
try:
|
| 154 |
+
user_intention = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
|
| 155 |
+
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
|
| 156 |
+
except JsonStringError as e:
|
| 157 |
+
yield from update_ui_lastest_msg(
|
| 158 |
+
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 失败 当前语言模型({llm_kwargs['llm_model']})不能理解您的意图", chatbot=chatbot, history=history, delay=0)
|
| 159 |
+
return
|
| 160 |
+
else:
|
| 161 |
+
pass
|
| 162 |
+
|
| 163 |
+
yield from update_ui_lastest_msg(
|
| 164 |
+
lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
|
| 165 |
+
chatbot=chatbot, history=history, delay=0)
|
| 166 |
+
|
| 167 |
+
# 用户意图: 修改本项目的配置
|
| 168 |
+
if user_intention.intention_type == 'ModifyConfiguration':
|
| 169 |
+
yield from modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
|
| 170 |
+
|
| 171 |
+
# 用户意图: 调度插件
|
| 172 |
+
if user_intention.intention_type == 'ExecutePlugin':
|
| 173 |
+
yield from execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
|
| 174 |
+
|
| 175 |
+
# 用户意图: 聊天
|
| 176 |
+
if user_intention.intention_type == 'Chat':
|
| 177 |
+
yield from chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
|
| 178 |
+
|
| 179 |
+
return
|
| 180 |
+
|
crazy_functions/解析JupyterNotebook.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import update_ui
|
| 2 |
+
from toolbox import CatchException, report_exception
|
| 3 |
+
from toolbox import write_history_to_file, promote_file_to_downloadzone
|
| 4 |
+
fast_debug = True
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class PaperFileGroup():
|
| 8 |
+
def __init__(self):
|
| 9 |
+
self.file_paths = []
|
| 10 |
+
self.file_contents = []
|
| 11 |
+
self.sp_file_contents = []
|
| 12 |
+
self.sp_file_index = []
|
| 13 |
+
self.sp_file_tag = []
|
| 14 |
+
|
| 15 |
+
def run_file_split(self, max_token_limit=1900):
|
| 16 |
+
"""
|
| 17 |
+
将长文本分离开来
|
| 18 |
+
"""
|
| 19 |
+
for index, file_content in enumerate(self.file_contents):
|
| 20 |
+
if self.get_token_num(file_content) < max_token_limit:
|
| 21 |
+
self.sp_file_contents.append(file_content)
|
| 22 |
+
self.sp_file_index.append(index)
|
| 23 |
+
self.sp_file_tag.append(self.file_paths[index])
|
| 24 |
+
else:
|
| 25 |
+
from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
|
| 26 |
+
segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
|
| 27 |
+
for j, segment in enumerate(segments):
|
| 28 |
+
self.sp_file_contents.append(segment)
|
| 29 |
+
self.sp_file_index.append(index)
|
| 30 |
+
self.sp_file_tag.append(
|
| 31 |
+
self.file_paths[index] + f".part-{j}.txt")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def parseNotebook(filename, enable_markdown=1):
|
| 36 |
+
import json
|
| 37 |
+
|
| 38 |
+
CodeBlocks = []
|
| 39 |
+
with open(filename, 'r', encoding='utf-8', errors='replace') as f:
|
| 40 |
+
notebook = json.load(f)
|
| 41 |
+
for cell in notebook['cells']:
|
| 42 |
+
if cell['cell_type'] == 'code' and cell['source']:
|
| 43 |
+
# remove blank lines
|
| 44 |
+
cell['source'] = [line for line in cell['source'] if line.strip()
|
| 45 |
+
!= '']
|
| 46 |
+
CodeBlocks.append("".join(cell['source']))
|
| 47 |
+
elif enable_markdown and cell['cell_type'] == 'markdown' and cell['source']:
|
| 48 |
+
cell['source'] = [line for line in cell['source'] if line.strip()
|
| 49 |
+
!= '']
|
| 50 |
+
CodeBlocks.append("Markdown:"+"".join(cell['source']))
|
| 51 |
+
|
| 52 |
+
Code = ""
|
| 53 |
+
for idx, code in enumerate(CodeBlocks):
|
| 54 |
+
Code += f"This is {idx+1}th code block: \n"
|
| 55 |
+
Code += code+"\n"
|
| 56 |
+
|
| 57 |
+
return Code
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
| 61 |
+
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
| 62 |
+
|
| 63 |
+
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
|
| 64 |
+
enable_markdown = plugin_kwargs.get("advanced_arg", "1")
|
| 65 |
+
try:
|
| 66 |
+
enable_markdown = int(enable_markdown)
|
| 67 |
+
except ValueError:
|
| 68 |
+
enable_markdown = 1
|
| 69 |
+
|
| 70 |
+
pfg = PaperFileGroup()
|
| 71 |
+
|
| 72 |
+
for fp in file_manifest:
|
| 73 |
+
file_content = parseNotebook(fp, enable_markdown=enable_markdown)
|
| 74 |
+
pfg.file_paths.append(fp)
|
| 75 |
+
pfg.file_contents.append(file_content)
|
| 76 |
+
|
| 77 |
+
# <-------- 拆分过长的IPynb文件 ---------->
|
| 78 |
+
pfg.run_file_split(max_token_limit=1024)
|
| 79 |
+
n_split = len(pfg.sp_file_contents)
|
| 80 |
+
|
| 81 |
+
inputs_array = [r"This is a Jupyter Notebook file, tell me about Each Block in Chinese. Focus Just On Code." +
|
| 82 |
+
r"If a block starts with `Markdown` which means it's a markdown block in ipynbipynb. " +
|
| 83 |
+
r"Start a new line for a block and block num use Chinese." +
|
| 84 |
+
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
| 85 |
+
inputs_show_user_array = [f"{f}的分析如下" for f in pfg.sp_file_tag]
|
| 86 |
+
sys_prompt_array = ["You are a professional programmer."] * n_split
|
| 87 |
+
|
| 88 |
+
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 89 |
+
inputs_array=inputs_array,
|
| 90 |
+
inputs_show_user_array=inputs_show_user_array,
|
| 91 |
+
llm_kwargs=llm_kwargs,
|
| 92 |
+
chatbot=chatbot,
|
| 93 |
+
history_array=[[""] for _ in range(n_split)],
|
| 94 |
+
sys_prompt_array=sys_prompt_array,
|
| 95 |
+
# max_workers=5, # OpenAI所允许的最大并行过载
|
| 96 |
+
scroller_max_len=80
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# <-------- 整理结果,退出 ---------->
|
| 100 |
+
block_result = " \n".join(gpt_response_collection)
|
| 101 |
+
chatbot.append(("解析的结果如下", block_result))
|
| 102 |
+
history.extend(["解析的结果如下", block_result])
|
| 103 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 104 |
+
|
| 105 |
+
# <-------- 写入文件,退出 ---------->
|
| 106 |
+
res = write_history_to_file(history)
|
| 107 |
+
promote_file_to_downloadzone(res, chatbot=chatbot)
|
| 108 |
+
chatbot.append(("完成了吗?", res))
|
| 109 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 110 |
+
|
| 111 |
+
@CatchException
|
| 112 |
+
def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 113 |
+
chatbot.append([
|
| 114 |
+
"函数插件功能?",
|
| 115 |
+
"对IPynb文件进行解析。Contributor: codycjy."])
|
| 116 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 117 |
+
|
| 118 |
+
history = [] # 清空历史
|
| 119 |
+
import glob
|
| 120 |
+
import os
|
| 121 |
+
if os.path.exists(txt):
|
| 122 |
+
project_folder = txt
|
| 123 |
+
else:
|
| 124 |
+
if txt == "":
|
| 125 |
+
txt = '空空如也的输入栏'
|
| 126 |
+
report_exception(chatbot, history,
|
| 127 |
+
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
| 128 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 129 |
+
return
|
| 130 |
+
if txt.endswith('.ipynb'):
|
| 131 |
+
file_manifest = [txt]
|
| 132 |
+
else:
|
| 133 |
+
file_manifest = [f for f in glob.glob(
|
| 134 |
+
f'{project_folder}/**/*.ipynb', recursive=True)]
|
| 135 |
+
if len(file_manifest) == 0:
|
| 136 |
+
report_exception(chatbot, history,
|
| 137 |
+
a=f"解析项目: {txt}", b=f"找不到任何.ipynb文件: {txt}")
|
| 138 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 139 |
+
return
|
| 140 |
+
yield from ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, )
|
crazy_functions/解析项目源代码.py
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import update_ui, promote_file_to_downloadzone, disable_auto_promotion
|
| 2 |
+
from toolbox import CatchException, report_exception, write_history_to_file
|
| 3 |
+
from .crazy_utils import input_clipping
|
| 4 |
+
|
| 5 |
+
def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
| 6 |
+
import os, copy
|
| 7 |
+
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
| 8 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 9 |
+
disable_auto_promotion(chatbot=chatbot)
|
| 10 |
+
|
| 11 |
+
summary_batch_isolation = True
|
| 12 |
+
inputs_array = []
|
| 13 |
+
inputs_show_user_array = []
|
| 14 |
+
history_array = []
|
| 15 |
+
sys_prompt_array = []
|
| 16 |
+
report_part_1 = []
|
| 17 |
+
|
| 18 |
+
assert len(file_manifest) <= 2048, "源文件太多(超过512个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。"
|
| 19 |
+
############################## <第一步,逐个文件分析,多线程> ##################################
|
| 20 |
+
for index, fp in enumerate(file_manifest):
|
| 21 |
+
# 读取文件
|
| 22 |
+
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
| 23 |
+
file_content = f.read()
|
| 24 |
+
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
|
| 25 |
+
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
|
| 26 |
+
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {fp}'
|
| 27 |
+
# 装载请求内容
|
| 28 |
+
inputs_array.append(i_say)
|
| 29 |
+
inputs_show_user_array.append(i_say_show_user)
|
| 30 |
+
history_array.append([])
|
| 31 |
+
sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。")
|
| 32 |
+
|
| 33 |
+
# 文件读取完成,对每一个源代码文件,生成一个请求线程,发送到chatgpt进行分析
|
| 34 |
+
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
| 35 |
+
inputs_array = inputs_array,
|
| 36 |
+
inputs_show_user_array = inputs_show_user_array,
|
| 37 |
+
history_array = history_array,
|
| 38 |
+
sys_prompt_array = sys_prompt_array,
|
| 39 |
+
llm_kwargs = llm_kwargs,
|
| 40 |
+
chatbot = chatbot,
|
| 41 |
+
show_user_at_complete = True
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# 全部文件解析完成,结果写入文件,准备对工程源代码进行汇总分析
|
| 45 |
+
report_part_1 = copy.deepcopy(gpt_response_collection)
|
| 46 |
+
history_to_return = report_part_1
|
| 47 |
+
res = write_history_to_file(report_part_1)
|
| 48 |
+
promote_file_to_downloadzone(res, chatbot=chatbot)
|
| 49 |
+
chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
|
| 50 |
+
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
|
| 51 |
+
|
| 52 |
+
############################## <第二步,综合,单线程,分组+迭代处理> ##################################
|
| 53 |
+
batchsize = 16 # 10个文件为一组
|
| 54 |
+
report_part_2 = []
|
| 55 |
+
previous_iteration_files = []
|
| 56 |
+
last_iteration_result = ""
|
| 57 |
+
while True:
|
| 58 |
+
if len(file_manifest) == 0: break
|
| 59 |
+
this_iteration_file_manifest = file_manifest[:batchsize]
|
| 60 |
+
this_iteration_gpt_response_collection = gpt_response_collection[:batchsize*2]
|
| 61 |
+
file_rel_path = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
|
| 62 |
+
# 把“请对下面的程序文件做一个概述” 替换成 精简的 "文件名:{all_file[index]}"
|
| 63 |
+
for index, content in enumerate(this_iteration_gpt_response_collection):
|
| 64 |
+
if index%2==0: this_iteration_gpt_response_collection[index] = f"{file_rel_path[index//2]}" # 只保留文件名节省token
|
| 65 |
+
this_iteration_files = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
|
| 66 |
+
previous_iteration_files.extend(this_iteration_files)
|
| 67 |
+
previous_iteration_files_string = ', '.join(previous_iteration_files)
|
| 68 |
+
current_iteration_focus = ', '.join(this_iteration_files)
|
| 69 |
+
if summary_batch_isolation: focus = current_iteration_focus
|
| 70 |
+
else: focus = previous_iteration_files_string
|
| 71 |
+
i_say = f'用一张Markdown表格简要描述以下文件的功能:{focus}。根据以上分析,用一句话概括程序的整体功能。'
|
| 72 |
+
if last_iteration_result != "":
|
| 73 |
+
sys_prompt_additional = "已知某些代码的局部作用是:" + last_iteration_result + "\n请继续分析其他源代码,从而更全面地理解项目的整体功能。"
|
| 74 |
+
else:
|
| 75 |
+
sys_prompt_additional = ""
|
| 76 |
+
inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
|
| 77 |
+
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
|
| 78 |
+
this_iteration_history.append(last_iteration_result)
|
| 79 |
+
# 裁剪input
|
| 80 |
+
inputs, this_iteration_history_feed = input_clipping(inputs=i_say, history=this_iteration_history, max_token_limit=2560)
|
| 81 |
+
result = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 82 |
+
inputs=inputs, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
|
| 83 |
+
history=this_iteration_history_feed, # 迭代之前的分析
|
| 84 |
+
sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。" + sys_prompt_additional)
|
| 85 |
+
|
| 86 |
+
summary = "请用一句话概括这些文件的整体功能"
|
| 87 |
+
summary_result = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 88 |
+
inputs=summary,
|
| 89 |
+
inputs_show_user=summary,
|
| 90 |
+
llm_kwargs=llm_kwargs,
|
| 91 |
+
chatbot=chatbot,
|
| 92 |
+
history=[i_say, result], # 迭代之前的分析
|
| 93 |
+
sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。" + sys_prompt_additional)
|
| 94 |
+
|
| 95 |
+
report_part_2.extend([i_say, result])
|
| 96 |
+
last_iteration_result = summary_result
|
| 97 |
+
file_manifest = file_manifest[batchsize:]
|
| 98 |
+
gpt_response_collection = gpt_response_collection[batchsize*2:]
|
| 99 |
+
|
| 100 |
+
############################## <END> ##################################
|
| 101 |
+
history_to_return.extend(report_part_2)
|
| 102 |
+
res = write_history_to_file(history_to_return)
|
| 103 |
+
promote_file_to_downloadzone(res, chatbot=chatbot)
|
| 104 |
+
chatbot.append(("完成了吗?", res))
|
| 105 |
+
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@CatchException
|
| 109 |
+
def 解析项目本身(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 110 |
+
history = [] # 清空历史,以免输入溢出
|
| 111 |
+
import glob
|
| 112 |
+
file_manifest = [f for f in glob.glob('./*.py')] + \
|
| 113 |
+
[f for f in glob.glob('./*/*.py')]
|
| 114 |
+
project_folder = './'
|
| 115 |
+
if len(file_manifest) == 0:
|
| 116 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
|
| 117 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 118 |
+
return
|
| 119 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 120 |
+
|
| 121 |
+
@CatchException
|
| 122 |
+
def 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 123 |
+
history = [] # 清空历史,以免输入溢出
|
| 124 |
+
import glob, os
|
| 125 |
+
if os.path.exists(txt):
|
| 126 |
+
project_folder = txt
|
| 127 |
+
else:
|
| 128 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 129 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 130 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 131 |
+
return
|
| 132 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
|
| 133 |
+
if len(file_manifest) == 0:
|
| 134 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
|
| 135 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 136 |
+
return
|
| 137 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 138 |
+
|
| 139 |
+
@CatchException
|
| 140 |
+
def 解析一个Matlab项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 141 |
+
history = [] # 清空历史,以免输入溢出
|
| 142 |
+
import glob, os
|
| 143 |
+
if os.path.exists(txt):
|
| 144 |
+
project_folder = txt
|
| 145 |
+
else:
|
| 146 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 147 |
+
report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 148 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 149 |
+
return
|
| 150 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.m', recursive=True)]
|
| 151 |
+
if len(file_manifest) == 0:
|
| 152 |
+
report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到任何`.m`源文件: {txt}")
|
| 153 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 154 |
+
return
|
| 155 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 156 |
+
|
| 157 |
+
@CatchException
|
| 158 |
+
def 解析一个C项目的头文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 159 |
+
history = [] # 清空历史,以免输入溢出
|
| 160 |
+
import glob, os
|
| 161 |
+
if os.path.exists(txt):
|
| 162 |
+
project_folder = txt
|
| 163 |
+
else:
|
| 164 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 165 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 166 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 167 |
+
return
|
| 168 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
|
| 169 |
+
[f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] #+ \
|
| 170 |
+
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
| 171 |
+
if len(file_manifest) == 0:
|
| 172 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
| 173 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 174 |
+
return
|
| 175 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 176 |
+
|
| 177 |
+
@CatchException
|
| 178 |
+
def 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 179 |
+
history = [] # 清空历史,以免输入溢出
|
| 180 |
+
import glob, os
|
| 181 |
+
if os.path.exists(txt):
|
| 182 |
+
project_folder = txt
|
| 183 |
+
else:
|
| 184 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 185 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 186 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 187 |
+
return
|
| 188 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)] + \
|
| 189 |
+
[f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
|
| 190 |
+
[f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] + \
|
| 191 |
+
[f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
| 192 |
+
if len(file_manifest) == 0:
|
| 193 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
|
| 194 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 195 |
+
return
|
| 196 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@CatchException
|
| 200 |
+
def 解析一个Java项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 201 |
+
history = [] # 清空历史,以免输入溢出
|
| 202 |
+
import glob, os
|
| 203 |
+
if os.path.exists(txt):
|
| 204 |
+
project_folder = txt
|
| 205 |
+
else:
|
| 206 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 207 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
| 208 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 209 |
+
return
|
| 210 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.java', recursive=True)] + \
|
| 211 |
+
[f for f in glob.glob(f'{project_folder}/**/*.jar', recursive=True)] + \
|
| 212 |
+
[f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
|
| 213 |
+
[f for f in glob.glob(f'{project_folder}/**/*.sh', recursive=True)]
|
| 214 |
+
if len(file_manifest) == 0:
|
| 215 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
|
| 216 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 217 |
+
return
|
| 218 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
@CatchException
|
| 222 |
+
def 解析一个前端项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 223 |
+
history = [] # 清空历史,以免输入溢出
|
| 224 |
+
import glob, os
|
| 225 |
+
if os.path.exists(txt):
|
| 226 |
+
project_folder = txt
|
| 227 |
+
else:
|
| 228 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 229 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
| 230 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 231 |
+
return
|
| 232 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.ts', recursive=True)] + \
|
| 233 |
+
[f for f in glob.glob(f'{project_folder}/**/*.tsx', recursive=True)] + \
|
| 234 |
+
[f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
|
| 235 |
+
[f for f in glob.glob(f'{project_folder}/**/*.js', recursive=True)] + \
|
| 236 |
+
[f for f in glob.glob(f'{project_folder}/**/*.vue', recursive=True)] + \
|
| 237 |
+
[f for f in glob.glob(f'{project_folder}/**/*.less', recursive=True)] + \
|
| 238 |
+
[f for f in glob.glob(f'{project_folder}/**/*.sass', recursive=True)] + \
|
| 239 |
+
[f for f in glob.glob(f'{project_folder}/**/*.wxml', recursive=True)] + \
|
| 240 |
+
[f for f in glob.glob(f'{project_folder}/**/*.wxss', recursive=True)] + \
|
| 241 |
+
[f for f in glob.glob(f'{project_folder}/**/*.css', recursive=True)] + \
|
| 242 |
+
[f for f in glob.glob(f'{project_folder}/**/*.jsx', recursive=True)]
|
| 243 |
+
if len(file_manifest) == 0:
|
| 244 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何前端相关文件: {txt}")
|
| 245 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 246 |
+
return
|
| 247 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
@CatchException
|
| 251 |
+
def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 252 |
+
history = [] # 清空历史,以免输入溢出
|
| 253 |
+
import glob, os
|
| 254 |
+
if os.path.exists(txt):
|
| 255 |
+
project_folder = txt
|
| 256 |
+
else:
|
| 257 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 258 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
| 259 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 260 |
+
return
|
| 261 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.go', recursive=True)] + \
|
| 262 |
+
[f for f in glob.glob(f'{project_folder}/**/go.mod', recursive=True)] + \
|
| 263 |
+
[f for f in glob.glob(f'{project_folder}/**/go.sum', recursive=True)] + \
|
| 264 |
+
[f for f in glob.glob(f'{project_folder}/**/go.work', recursive=True)]
|
| 265 |
+
if len(file_manifest) == 0:
|
| 266 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
|
| 267 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 268 |
+
return
|
| 269 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 270 |
+
|
| 271 |
+
@CatchException
|
| 272 |
+
def 解析一个Rust项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 273 |
+
history = [] # 清空历史,以免输入溢出
|
| 274 |
+
import glob, os
|
| 275 |
+
if os.path.exists(txt):
|
| 276 |
+
project_folder = txt
|
| 277 |
+
else:
|
| 278 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 279 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
| 280 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 281 |
+
return
|
| 282 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.rs', recursive=True)] + \
|
| 283 |
+
[f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)] + \
|
| 284 |
+
[f for f in glob.glob(f'{project_folder}/**/*.lock', recursive=True)]
|
| 285 |
+
if len(file_manifest) == 0:
|
| 286 |
+
report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
|
| 287 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 288 |
+
return
|
| 289 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 290 |
+
|
| 291 |
+
@CatchException
|
| 292 |
+
def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 293 |
+
history = [] # 清空历史,以免输入溢出
|
| 294 |
+
import glob, os
|
| 295 |
+
if os.path.exists(txt):
|
| 296 |
+
project_folder = txt
|
| 297 |
+
else:
|
| 298 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 299 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 300 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 301 |
+
return
|
| 302 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.lua', recursive=True)] + \
|
| 303 |
+
[f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
|
| 304 |
+
[f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
|
| 305 |
+
[f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)]
|
| 306 |
+
if len(file_manifest) == 0:
|
| 307 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
|
| 308 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 309 |
+
return
|
| 310 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
@CatchException
|
| 314 |
+
def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 315 |
+
history = [] # 清空历史,以免输入溢出
|
| 316 |
+
import glob, os
|
| 317 |
+
if os.path.exists(txt):
|
| 318 |
+
project_folder = txt
|
| 319 |
+
else:
|
| 320 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 321 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 322 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 323 |
+
return
|
| 324 |
+
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.cs', recursive=True)] + \
|
| 325 |
+
[f for f in glob.glob(f'{project_folder}/**/*.csproj', recursive=True)]
|
| 326 |
+
if len(file_manifest) == 0:
|
| 327 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
|
| 328 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 329 |
+
return
|
| 330 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
@CatchException
|
| 334 |
+
def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 335 |
+
txt_pattern = plugin_kwargs.get("advanced_arg")
|
| 336 |
+
txt_pattern = txt_pattern.replace(",", ",")
|
| 337 |
+
# 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
|
| 338 |
+
pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
|
| 339 |
+
if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
|
| 340 |
+
# 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
|
| 341 |
+
pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
|
| 342 |
+
pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
|
| 343 |
+
# 将要忽略匹配的文件名(例如: ^README.md)
|
| 344 |
+
pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
|
| 345 |
+
# 生成正则表达式
|
| 346 |
+
pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
|
| 347 |
+
pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
|
| 348 |
+
|
| 349 |
+
history.clear()
|
| 350 |
+
import glob, os, re
|
| 351 |
+
if os.path.exists(txt):
|
| 352 |
+
project_folder = txt
|
| 353 |
+
else:
|
| 354 |
+
if txt == "": txt = '空空如也的输入栏'
|
| 355 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
| 356 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 357 |
+
return
|
| 358 |
+
# 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
|
| 359 |
+
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
|
| 360 |
+
if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
|
| 361 |
+
extract_folder_path = maybe_dir[0]
|
| 362 |
+
else:
|
| 363 |
+
extract_folder_path = project_folder
|
| 364 |
+
# 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
|
| 365 |
+
file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
|
| 366 |
+
os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
|
| 367 |
+
if len(file_manifest) == 0:
|
| 368 |
+
report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
|
| 369 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 370 |
+
return
|
| 371 |
+
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
crazy_functions/谷歌检索小助手.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 2 |
+
from toolbox import CatchException, report_exception, promote_file_to_downloadzone
|
| 3 |
+
from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file
|
| 4 |
+
import logging
|
| 5 |
+
import requests
|
| 6 |
+
import time
|
| 7 |
+
import random
|
| 8 |
+
|
| 9 |
+
ENABLE_ALL_VERSION_SEARCH = True
|
| 10 |
+
|
| 11 |
+
def get_meta_information(url, chatbot, history):
|
| 12 |
+
import arxiv
|
| 13 |
+
import difflib
|
| 14 |
+
import re
|
| 15 |
+
from bs4 import BeautifulSoup
|
| 16 |
+
from toolbox import get_conf
|
| 17 |
+
from urllib.parse import urlparse
|
| 18 |
+
session = requests.session()
|
| 19 |
+
|
| 20 |
+
proxies = get_conf('proxies')
|
| 21 |
+
headers = {
|
| 22 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
|
| 23 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
| 24 |
+
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
|
| 25 |
+
'Cache-Control':'max-age=0',
|
| 26 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
| 27 |
+
'Connection': 'keep-alive'
|
| 28 |
+
}
|
| 29 |
+
try:
|
| 30 |
+
session.proxies.update(proxies)
|
| 31 |
+
except:
|
| 32 |
+
report_exception(chatbot, history,
|
| 33 |
+
a=f"获取代理失败 无代理状态下很可能无法访问OpenAI家族的模型及谷歌学术 建议:检查USE_PROXY选项是否修改。",
|
| 34 |
+
b=f"尝试直接连接")
|
| 35 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 36 |
+
session.headers.update(headers)
|
| 37 |
+
|
| 38 |
+
response = session.get(url)
|
| 39 |
+
# 解析网页内容
|
| 40 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 41 |
+
|
| 42 |
+
def string_similar(s1, s2):
|
| 43 |
+
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
|
| 44 |
+
|
| 45 |
+
if ENABLE_ALL_VERSION_SEARCH:
|
| 46 |
+
def search_all_version(url):
|
| 47 |
+
time.sleep(random.randint(1,5)) # 睡一会防止触发google反爬虫
|
| 48 |
+
response = session.get(url)
|
| 49 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 50 |
+
|
| 51 |
+
for result in soup.select(".gs_ri"):
|
| 52 |
+
try:
|
| 53 |
+
url = result.select_one(".gs_rt").a['href']
|
| 54 |
+
except:
|
| 55 |
+
continue
|
| 56 |
+
arxiv_id = extract_arxiv_id(url)
|
| 57 |
+
if not arxiv_id:
|
| 58 |
+
continue
|
| 59 |
+
search = arxiv.Search(
|
| 60 |
+
id_list=[arxiv_id],
|
| 61 |
+
max_results=1,
|
| 62 |
+
sort_by=arxiv.SortCriterion.Relevance,
|
| 63 |
+
)
|
| 64 |
+
try: paper = next(search.results())
|
| 65 |
+
except: paper = None
|
| 66 |
+
return paper
|
| 67 |
+
|
| 68 |
+
return None
|
| 69 |
+
|
| 70 |
+
def extract_arxiv_id(url):
|
| 71 |
+
# 返回给定的url解析出的arxiv_id,如url未成功匹配返回None
|
| 72 |
+
pattern = r'arxiv.org/abs/([^/]+)'
|
| 73 |
+
match = re.search(pattern, url)
|
| 74 |
+
if match:
|
| 75 |
+
return match.group(1)
|
| 76 |
+
else:
|
| 77 |
+
return None
|
| 78 |
+
|
| 79 |
+
profile = []
|
| 80 |
+
# 获取所有文章的标题和作者
|
| 81 |
+
for result in soup.select(".gs_ri"):
|
| 82 |
+
title = result.a.text.replace('\n', ' ').replace(' ', ' ')
|
| 83 |
+
author = result.select_one(".gs_a").text
|
| 84 |
+
try:
|
| 85 |
+
citation = result.select_one(".gs_fl > a[href*='cites']").text # 引用次数是链接中的文本,直接取出来
|
| 86 |
+
except:
|
| 87 |
+
citation = 'cited by 0'
|
| 88 |
+
abstract = result.select_one(".gs_rs").text.strip() # 摘要在 .gs_rs 中的文本,需要清除首尾空格
|
| 89 |
+
|
| 90 |
+
# 首先在arxiv上搜索,获取文章摘要
|
| 91 |
+
search = arxiv.Search(
|
| 92 |
+
query = title,
|
| 93 |
+
max_results = 1,
|
| 94 |
+
sort_by = arxiv.SortCriterion.Relevance,
|
| 95 |
+
)
|
| 96 |
+
try: paper = next(search.results())
|
| 97 |
+
except: paper = None
|
| 98 |
+
|
| 99 |
+
is_match = paper is not None and string_similar(title, paper.title) > 0.90
|
| 100 |
+
|
| 101 |
+
# 如果在Arxiv上匹配失败,检索文章的历史版本的题目
|
| 102 |
+
if not is_match and ENABLE_ALL_VERSION_SEARCH:
|
| 103 |
+
other_versions_page_url = [tag['href'] for tag in result.select_one('.gs_flb').select('.gs_nph') if 'cluster' in tag['href']]
|
| 104 |
+
if len(other_versions_page_url) > 0:
|
| 105 |
+
other_versions_page_url = other_versions_page_url[0]
|
| 106 |
+
paper = search_all_version('http://' + urlparse(url).netloc + other_versions_page_url)
|
| 107 |
+
is_match = paper is not None and string_similar(title, paper.title) > 0.90
|
| 108 |
+
|
| 109 |
+
if is_match:
|
| 110 |
+
# same paper
|
| 111 |
+
abstract = paper.summary.replace('\n', ' ')
|
| 112 |
+
is_paper_in_arxiv = True
|
| 113 |
+
else:
|
| 114 |
+
# different paper
|
| 115 |
+
abstract = abstract
|
| 116 |
+
is_paper_in_arxiv = False
|
| 117 |
+
|
| 118 |
+
logging.info('[title]:' + title)
|
| 119 |
+
logging.info('[author]:' + author)
|
| 120 |
+
logging.info('[citation]:' + citation)
|
| 121 |
+
|
| 122 |
+
profile.append({
|
| 123 |
+
'title': title,
|
| 124 |
+
'author': author,
|
| 125 |
+
'citation': citation,
|
| 126 |
+
'abstract': abstract,
|
| 127 |
+
'is_paper_in_arxiv': is_paper_in_arxiv,
|
| 128 |
+
})
|
| 129 |
+
|
| 130 |
+
chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中(不在arxiv中无法获取完整摘要):{is_paper_in_arxiv}\n\n' + abstract]
|
| 131 |
+
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
| 132 |
+
return profile
|
| 133 |
+
|
| 134 |
+
@CatchException
|
| 135 |
+
def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 136 |
+
disable_auto_promotion(chatbot=chatbot)
|
| 137 |
+
# 基本信息:功能、贡献者
|
| 138 |
+
chatbot.append([
|
| 139 |
+
"函数插件功能?",
|
| 140 |
+
"分析用户提供的谷歌学术(google scholar)搜索页面中,出现的所有文章: binary-husky,插件初始化中..."])
|
| 141 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 142 |
+
|
| 143 |
+
# 尝试导入依赖,如果缺少依赖,则给出安装建议
|
| 144 |
+
try:
|
| 145 |
+
import arxiv
|
| 146 |
+
import math
|
| 147 |
+
from bs4 import BeautifulSoup
|
| 148 |
+
except:
|
| 149 |
+
report_exception(chatbot, history,
|
| 150 |
+
a = f"解析项目: {txt}",
|
| 151 |
+
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
|
| 152 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 153 |
+
return
|
| 154 |
+
|
| 155 |
+
# 清空历史,以免输入溢出
|
| 156 |
+
history = []
|
| 157 |
+
meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
|
| 158 |
+
if len(meta_paper_info_list) == 0:
|
| 159 |
+
yield from update_ui_lastest_msg(lastmsg='获取文献失败,可能触发了google反爬虫机制。',chatbot=chatbot, history=history, delay=0)
|
| 160 |
+
return
|
| 161 |
+
batchsize = 5
|
| 162 |
+
for batch in range(math.ceil(len(meta_paper_info_list)/batchsize)):
|
| 163 |
+
if len(meta_paper_info_list[:batchsize]) > 0:
|
| 164 |
+
i_say = "下面是一些学术文献的数据,提取出以下内容:" + \
|
| 165 |
+
"1、英文题目;2、中文题目翻译;3、作者;4、arxiv公开(is_paper_in_arxiv);4、引用数量(cite);5、中文摘要翻译。" + \
|
| 166 |
+
f"以下是信息源:{str(meta_paper_info_list[:batchsize])}"
|
| 167 |
+
|
| 168 |
+
inputs_show_user = f"请分析此页面中出现的所有文章:{txt},这是第{batch+1}批"
|
| 169 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 170 |
+
inputs=i_say, inputs_show_user=inputs_show_user,
|
| 171 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
| 172 |
+
sys_prompt="你是一个学术翻译,请从数据中提取信息。你必须使用Markdown表格。你必须逐个文献进行处理。"
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
history.extend([ f"第{batch+1}批", gpt_say ])
|
| 176 |
+
meta_paper_info_list = meta_paper_info_list[batchsize:]
|
| 177 |
+
|
| 178 |
+
chatbot.append(["状态?",
|
| 179 |
+
"已经全部完成,您可以试试让AI写一个Related Works,例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
|
| 180 |
+
msg = '正常'
|
| 181 |
+
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
| 182 |
+
path = write_history_to_file(history)
|
| 183 |
+
promote_file_to_downloadzone(path, chatbot=chatbot)
|
| 184 |
+
chatbot.append(("完成了吗?", path));
|
| 185 |
+
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
crazy_functions/辅助功能.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# encoding: utf-8
|
| 2 |
+
# @Time : 2023/4/19
|
| 3 |
+
# @Author : Spike
|
| 4 |
+
# @Descr :
|
| 5 |
+
from toolbox import update_ui, get_conf, get_user
|
| 6 |
+
from toolbox import CatchException
|
| 7 |
+
from toolbox import default_user_name
|
| 8 |
+
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 9 |
+
import shutil
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@CatchException
|
| 14 |
+
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 15 |
+
if txt:
|
| 16 |
+
show_say = txt
|
| 17 |
+
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
|
| 18 |
+
else:
|
| 19 |
+
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
|
| 20 |
+
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
|
| 21 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 22 |
+
inputs=prompt,
|
| 23 |
+
inputs_show_user=show_say,
|
| 24 |
+
llm_kwargs=llm_kwargs,
|
| 25 |
+
chatbot=chatbot,
|
| 26 |
+
history=history,
|
| 27 |
+
sys_prompt=system_prompt
|
| 28 |
+
)
|
| 29 |
+
chatbot[-1] = (show_say, gpt_say)
|
| 30 |
+
history.extend([show_say, gpt_say])
|
| 31 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@CatchException
|
| 35 |
+
def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 36 |
+
chatbot.append(['清除本地缓存数据', '执行中. 删除数据'])
|
| 37 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
| 38 |
+
|
| 39 |
+
def _get_log_folder(user=default_user_name):
|
| 40 |
+
PATH_LOGGING = get_conf('PATH_LOGGING')
|
| 41 |
+
_dir = os.path.join(PATH_LOGGING, user)
|
| 42 |
+
if not os.path.exists(_dir): os.makedirs(_dir)
|
| 43 |
+
return _dir
|
| 44 |
+
|
| 45 |
+
def _get_upload_folder(user=default_user_name):
|
| 46 |
+
PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
|
| 47 |
+
_dir = os.path.join(PATH_PRIVATE_UPLOAD, user)
|
| 48 |
+
return _dir
|
| 49 |
+
|
| 50 |
+
shutil.rmtree(_get_log_folder(get_user(chatbot)), ignore_errors=True)
|
| 51 |
+
shutil.rmtree(_get_upload_folder(get_user(chatbot)), ignore_errors=True)
|
| 52 |
+
|
| 53 |
+
chatbot.append(['清除本地缓存数据', '执行完成'])
|
| 54 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
crazy_functions/高级功能函数模板.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from toolbox import CatchException, update_ui
|
| 2 |
+
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
| 3 |
+
import datetime
|
| 4 |
+
@CatchException
|
| 5 |
+
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
| 6 |
+
"""
|
| 7 |
+
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
| 8 |
+
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
| 9 |
+
plugin_kwargs 插件模型的参数,用于灵活调整复杂功能的各种参数
|
| 10 |
+
chatbot 聊天显示框的句柄,用于显示给用户
|
| 11 |
+
history 聊天历史,前情提要
|
| 12 |
+
system_prompt 给gpt的静默提醒
|
| 13 |
+
web_port 当前软件运行的端口号
|
| 14 |
+
"""
|
| 15 |
+
history = [] # 清空历史,以免输入溢出
|
| 16 |
+
chatbot.append(("这是什么功能?", "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板(该函数只有20多行代码)。此外我们也提供可同步处理大量文件的多线程Demo供您参考。您若希望分享新的功能模组,请不吝PR!"))
|
| 17 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
| 18 |
+
for i in range(5):
|
| 19 |
+
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
|
| 20 |
+
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
|
| 21 |
+
i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
|
| 22 |
+
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
| 23 |
+
inputs=i_say, inputs_show_user=i_say,
|
| 24 |
+
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
| 25 |
+
sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
|
| 26 |
+
)
|
| 27 |
+
chatbot[-1] = (i_say, gpt_say)
|
| 28 |
+
history.append(i_say);history.append(gpt_say)
|
| 29 |
+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|