gpt-analysisi-code

Sleeping

App Files Files Community

gordonchan commited on Jan 17, 2024

Commit

85b36ac

verified ·

1 Parent(s): 8efb4df

Upload 34 files

Browse files

Files changed (34) hide show

crazy_functions/CodeInterpreter.py +232 -0
crazy_functions/__init__.py +0 -0
crazy_functions/agent_fns/auto_agent.py +23 -0
crazy_functions/agent_fns/echo_agent.py +19 -0
crazy_functions/agent_fns/general.py +134 -0
crazy_functions/agent_fns/persistent.py +16 -0
crazy_functions/agent_fns/pipe.py +194 -0
crazy_functions/agent_fns/watchdog.py +28 -0
crazy_functions/chatglm微调工具.py +141 -0
crazy_functions/crazy_utils.py +609 -0
crazy_functions/gen_fns/gen_fns_shared.py +70 -0
crazy_functions/ipc_fns/mp.py +37 -0
crazy_functions/json_fns/pydantic_io.py +111 -0
crazy_functions/live_audio/aliyunASR.py +261 -0
crazy_functions/live_audio/audio_io.py +51 -0
crazy_functions/multi_stage/multi_stage_utils.py +93 -0
crazy_functions/pdf_fns/breakdown_txt.py +125 -0
crazy_functions/pdf_fns/parse_pdf.py +171 -0
crazy_functions/pdf_fns/report_gen_html.py +58 -0
crazy_functions/pdf_fns/report_template.html +0 -0
crazy_functions/vt_fns/vt_call_plugin.py +114 -0
crazy_functions/vt_fns/vt_modify_config.py +81 -0
crazy_functions/vt_fns/vt_state.py +28 -0
crazy_functions/命令行助手.py +31 -0
crazy_functions/对话历史存档.py +152 -0
crazy_functions/生成函数注释.py +56 -0
crazy_functions/联网的ChatGPT.py +106 -0
crazy_functions/联网的ChatGPT_bing版.py +106 -0
crazy_functions/虚空终端.py +180 -0
crazy_functions/解析JupyterNotebook.py +140 -0
crazy_functions/解析项目源代码.py +371 -0
crazy_functions/谷歌检索小助手.py +185 -0
crazy_functions/辅助功能.py +54 -0
crazy_functions/高级功能函数模板.py +29 -0

crazy_functions/CodeInterpreter.py ADDED Viewed

	@@ -0,0 +1,232 @@

+from collections.abc import Callable, Iterable, Mapping
+from typing import Any
+from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc
+from toolbox import promote_file_to_downloadzone, get_log_folder
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+from .crazy_utils import input_clipping, try_install_deps
+from multiprocessing import Process, Pipe
+import os
+import time
+templete = """
+```python
+import ...  # Put dependencies here, e.g. import numpy as np
+class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
+    def run(self, path):    # The name of the function must be `run`, it takes only a positional argument.
+        # rewrite the function you have just written here
+        ...
+        return generated_file_path
+```
+"""
+def inspect_dependency(chatbot, history):
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    return True
+def get_code_block(reply):
+    import re
+    pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
+    matches = re.findall(pattern, reply) # find all code blocks in text
+    if len(matches) == 1:
+        return matches[0].strip('python') #  code block
+    for match in matches:
+        if 'class TerminalFunction' in match:
+            return match.strip('python') #  code block
+    raise RuntimeError("GPT is not generating proper code.")
+def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
+    # 输入
+    prompt_compose = [
+        f'Your job:\n'
+        f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
+        f"2. You should write this function to perform following task: " + txt + "\n",
+        f"3. Wrap the output python function with markdown codeblock."
+    ]
+    i_say = "".join(prompt_compose)
+    demo = []
+    # 第一步
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=i_say,
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
+        sys_prompt= r"You are a programmer."
+    )
+    history.extend([i_say, gpt_say])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+    # 第二步
+    prompt_compose = [
+        "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
+        templete
+    ]
+    i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=inputs_show_user,
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+        sys_prompt= r"You are a programmer."
+    )
+    code_to_return = gpt_say
+    history.extend([i_say, gpt_say])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+    # # 第三步
+    # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
+    # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
+    # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
+    #     inputs=i_say, inputs_show_user=inputs_show_user,
+    #     llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+    #     sys_prompt= r"You are a programmer."
+    # )
+    # # # 第三步
+    # i_say = "Show me how to use `pip` to install packages to run the code above. "
+    # i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
+    # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
+    #     inputs=i_say, inputs_show_user=i_say,
+    #     llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+    #     sys_prompt= r"You are a programmer."
+    # )
+    installation_advance = ""
+    return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
+def make_module(code):
+    module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
+    with open(f'{get_log_folder()}/{module_file}.py', 'w', encoding='utf8') as f:
+        f.write(code)
+    def get_class_name(class_string):
+        import re
+        # Use regex to extract the class name
+        class_name = re.search(r'class (\w+)\(', class_string).group(1)
+        return class_name
+    class_name = get_class_name(code)
+    return f"{get_log_folder().replace('/', '.')}.{module_file}->{class_name}"
+def init_module_instance(module):
+    import importlib
+    module_, class_ = module.split('->')
+    init_f = getattr(importlib.import_module(module_), class_)
+    return init_f()
+def for_immediate_show_off_when_possible(file_type, fp, chatbot):
+    if file_type in ['png', 'jpg']:
+        image_path = os.path.abspath(fp)
+        chatbot.append(['这是一张图片, 展示如下:',
+            f'本地文件地址: <br/>`{image_path}`<br/>'+
+            f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
+        ])
+    return chatbot
+def subprocess_worker(instance, file_path, return_dict):
+    return_dict['result'] = instance.run(file_path)
+def have_any_recent_upload_files(chatbot):
+    _5min = 5 * 60
+    if not chatbot: return False    # chatbot is None
+    most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
+    if not most_recent_uploaded: return False   # most_recent_uploaded is None
+    if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
+    else: return False  # most_recent_uploaded is too old
+def get_recent_file_prompt_support(chatbot):
+    most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
+    path = most_recent_uploaded['path']
+    return path
+@CatchException
+def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    raise NotImplementedError
+    # 清空历史，以免输入溢出
+    history = []; clear_file_downloadzone(chatbot)
+    # 基本信息：功能、贡献者
+    chatbot.append([
+        "函数插件功能？",
+        "CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 插件初始化中 ..."
+    ])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    if have_any_recent_upload_files(chatbot):
+        file_path = get_recent_file_prompt_support(chatbot)
+    else:
+        chatbot.append(["文件检索", "没有发现任何近期上传的文件。"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    # 读取文件
+    if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
+    recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
+    file_path = recently_uploaded_files[-1]
+    file_type = file_path.split('.')[-1]
+    # 粗心检查
+    if is_the_upload_folder(txt):
+        chatbot.append([
+            "...",
+            f"请在输入框内填写需求，然后再次点击该插件（文件路径 {file_path} 已经被记忆）"
+        ])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    # 开始干正事
+    for j in range(5):  # 最多重试5次
+        try:
+            code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
+                yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
+            code = get_code_block(code)
+            res = make_module(code)
+            instance = init_module_instance(res)
+            break
+        except Exception as e:
+            chatbot.append([f"第{j}次代码生成尝试，失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
+            yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    # 代码生成结束, 开始执行
+    try:
+        import multiprocessing
+        manager = multiprocessing.Manager()
+        return_dict = manager.dict()
+        p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
+        # only has 10 seconds to run
+        p.start(); p.join(timeout=10)
+        if p.is_alive(): p.terminate(); p.join()
+        p.close()
+        res = return_dict['result']
+        # res = instance.run(file_path)
+    except Exception as e:
+        chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
+        # chatbot.append(["如果是缺乏依赖，请参考以下建议", installation_advance])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    # 顺利完成，收尾
+    res = str(res)
+    if os.path.exists(res):
+        chatbot.append(["执行成功了，结果是一个有效文件", "结果：" + res])
+        new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
+        chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+    else:
+        chatbot.append(["执行成功了，结果是一个字符串", "结果：" + res])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+"""
+测试：
+    裁剪图像，保留下半部分
+    交换图像的蓝色通道和红色通道
+    将图像转为灰度图像
+    将csv文件转excel表格
+"""

crazy_functions/__init__.py ADDED Viewed

File without changes

crazy_functions/agent_fns/auto_agent.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, ProxyNetworkActivate
+from toolbox import report_exception, get_log_folder, update_ui_lastest_msg, Singleton
+from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
+from crazy_functions.agent_fns.general import AutoGenGeneral
+class AutoGenMath(AutoGenGeneral):
+    def define_agents(self):
+        from autogen import AssistantAgent, UserProxyAgent
+        return [
+            {
+                "name": "assistant",            # name of the agent.
+                "cls":  AssistantAgent,         # class of the agent.
+            },
+            {
+                "name": "user_proxy",           # name of the agent.
+                "cls":  UserProxyAgent,         # class of the agent.
+                "human_input_mode": "ALWAYS",   # always ask for human input.
+                "llm_config": False,            # disables llm-based auto reply.
+            },
+        ]

crazy_functions/agent_fns/echo_agent.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
+class EchoDemo(PluginMultiprocessManager):
+    def subprocess_worker(self, child_conn):
+        # ⭐⭐ 子进程
+        self.child_conn = child_conn
+        while True:
+            msg = self.child_conn.recv() # PipeCom
+            if msg.cmd == "user_input":
+                # wait futher user input
+                self.child_conn.send(PipeCom("show", msg.content))
+                wait_success = self.subprocess_worker_wait_user_feedback(wait_msg="我准备好处理下一个问题了.")
+                if not wait_success:
+                    # wait timeout, terminate this subprocess_worker
+                    break
+            elif msg.cmd == "terminate":
+                self.child_conn.send(PipeCom("done", ""))
+                break
+        print('[debug] subprocess_worker terminated')

crazy_functions/agent_fns/general.py ADDED Viewed

	@@ -0,0 +1,134 @@

+from toolbox import trimmed_format_exc, get_conf, ProxyNetworkActivate
+from crazy_functions.agent_fns.pipe import PluginMultiprocessManager, PipeCom
+from request_llms.bridge_all import predict_no_ui_long_connection
+import time
+def gpt_academic_generate_oai_reply(
+    self,
+    messages,
+    sender,
+    config,
+):
+    llm_config = self.llm_config if config is None else config
+    if llm_config is False:
+        return False, None
+    if messages is None:
+        messages = self._oai_messages[sender]
+    inputs = messages[-1]['content']
+    history = []
+    for message in messages[:-1]:
+        history.append(message['content'])
+    context=messages[-1].pop("context", None)
+    assert context is None, "预留参数 context 未实现"
+    reply = predict_no_ui_long_connection(
+        inputs=inputs,
+        llm_kwargs=llm_config,
+        history=history,
+        sys_prompt=self._oai_system_message[0]['content'],
+        console_slience=True
+    )
+    assumed_done = reply.endswith('\nTERMINATE')
+    return True, reply
+class AutoGenGeneral(PluginMultiprocessManager):
+    def gpt_academic_print_override(self, user_proxy, message, sender):
+        # ⭐⭐ run in subprocess
+        self.child_conn.send(PipeCom("show", sender.name + "\n\n---\n\n" + message["content"]))
+    def gpt_academic_get_human_input(self, user_proxy, message):
+        # ⭐⭐ run in subprocess
+        patience = 300
+        begin_waiting_time = time.time()
+        self.child_conn.send(PipeCom("interact", message))
+        while True:
+            time.sleep(0.5)
+            if self.child_conn.poll():
+                wait_success = True
+                break
+            if time.time() - begin_waiting_time > patience:
+                self.child_conn.send(PipeCom("done", ""))
+                wait_success = False
+                break
+        if wait_success:
+            return self.child_conn.recv().content
+        else:
+            raise TimeoutError("等待用户输入超时")
+    def define_agents(self):
+        raise NotImplementedError
+    def exe_autogen(self, input):
+        # ⭐⭐ run in subprocess
+        input = input.content
+        with ProxyNetworkActivate("AutoGen"):
+            code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
+            agents = self.define_agents()
+            user_proxy = None
+            assistant = None
+            for agent_kwargs in agents:
+                agent_cls = agent_kwargs.pop('cls')
+                kwargs = {
+                    'llm_config':self.llm_kwargs,
+                    'code_execution_config':code_execution_config
+                }
+                kwargs.update(agent_kwargs)
+                agent_handle = agent_cls(**kwargs)
+                agent_handle._print_received_message = lambda a,b: self.gpt_academic_print_override(agent_kwargs, a, b)
+                for d in agent_handle._reply_func_list:
+                    if hasattr(d['reply_func'],'__name__') and d['reply_func'].__name__ == 'generate_oai_reply':
+                        d['reply_func'] = gpt_academic_generate_oai_reply
+                if agent_kwargs['name'] == 'user_proxy':
+                    agent_handle.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
+                    user_proxy = agent_handle
+                if agent_kwargs['name'] == 'assistant': assistant = agent_handle
+            try:
+                if user_proxy is None or assistant is None: raise Exception("用户代理或助理代理未定义")
+                user_proxy.initiate_chat(assistant, message=input)
+            except Exception as e:
+                tb_str = '```\n' + trimmed_format_exc() + '```'
+                self.child_conn.send(PipeCom("done", "AutoGen 执行失败: \n\n" + tb_str))
+    def subprocess_worker(self, child_conn):
+        # ⭐⭐ run in subprocess
+        self.child_conn = child_conn
+        while True:
+            msg = self.child_conn.recv()  # PipeCom
+            self.exe_autogen(msg)
+class AutoGenGroupChat(AutoGenGeneral):
+    def exe_autogen(self, input):
+        # ⭐⭐ run in subprocess
+        import autogen
+        input = input.content
+        with ProxyNetworkActivate("AutoGen"):
+            code_execution_config = {"work_dir": self.autogen_work_dir, "use_docker": self.use_docker}
+            agents = self.define_agents()
+            agents_instances = []
+            for agent_kwargs in agents:
+                agent_cls = agent_kwargs.pop("cls")
+                kwargs = {"code_execution_config": code_execution_config}
+                kwargs.update(agent_kwargs)
+                agent_handle = agent_cls(**kwargs)
+                agent_handle._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
+                agents_instances.append(agent_handle)
+                if agent_kwargs["name"] == "user_proxy":
+                    user_proxy = agent_handle
+                    user_proxy.get_human_input = lambda a: self.gpt_academic_get_human_input(user_proxy, a)
+            try:
+                groupchat = autogen.GroupChat(agents=agents_instances, messages=[], max_round=50)
+                manager = autogen.GroupChatManager(groupchat=groupchat, **self.define_group_chat_manager_config())
+                manager._print_received_message = lambda a, b: self.gpt_academic_print_override(agent_kwargs, a, b)
+                manager.get_human_input = lambda a: self.gpt_academic_get_human_input(manager, a)
+                if user_proxy is None:
+                    raise Exception("user_proxy is not defined")
+                user_proxy.initiate_chat(manager, message=input)
+            except Exception:
+                tb_str = "```\n" + trimmed_format_exc() + "```"
+                self.child_conn.send(PipeCom("done", "AutoGen exe failed: \n\n" + tb_str))
+    def define_group_chat_manager_config(self):
+        raise NotImplementedError

crazy_functions/agent_fns/persistent.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from toolbox import Singleton
+@Singleton
+class GradioMultiuserManagerForPersistentClasses():
+    def __init__(self):
+        self.mapping = {}
+    def already_alive(self, key):
+        return (key in self.mapping) and (self.mapping[key].is_alive())
+    def set(self, key, x):
+        self.mapping[key] = x
+        return self.mapping[key]
+    def get(self, key):
+        return self.mapping[key]

crazy_functions/agent_fns/pipe.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from toolbox import get_log_folder, update_ui, gen_time_str, get_conf, promote_file_to_downloadzone
+from crazy_functions.agent_fns.watchdog import WatchDog
+import time, os
+class PipeCom:
+    def __init__(self, cmd, content) -> None:
+        self.cmd = cmd
+        self.content = content
+class PluginMultiprocessManager:
+    def __init__(self, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        # ⭐ run in main process
+        self.autogen_work_dir = os.path.join(get_log_folder("autogen"), gen_time_str())
+        self.previous_work_dir_files = {}
+        self.llm_kwargs = llm_kwargs
+        self.plugin_kwargs = plugin_kwargs
+        self.chatbot = chatbot
+        self.history = history
+        self.system_prompt = system_prompt
+        # self.web_port = web_port
+        self.alive = True
+        self.use_docker = get_conf("AUTOGEN_USE_DOCKER")
+        self.last_user_input = ""
+        # create a thread to monitor self.heartbeat, terminate the instance if no heartbeat for a long time
+        timeout_seconds = 5 * 60
+        self.heartbeat_watchdog = WatchDog(timeout=timeout_seconds, bark_fn=self.terminate, interval=5)
+        self.heartbeat_watchdog.begin_watch()
+    def feed_heartbeat_watchdog(self):
+        # feed this `dog`, so the dog will not `bark` (bark_fn will terminate the instance)
+        self.heartbeat_watchdog.feed()
+    def is_alive(self):
+        return self.alive
+    def launch_subprocess_with_pipe(self):
+        # ⭐ run in main process
+        from multiprocessing import Process, Pipe
+        parent_conn, child_conn = Pipe()
+        self.p = Process(target=self.subprocess_worker, args=(child_conn,))
+        self.p.daemon = True
+        self.p.start()
+        return parent_conn
+    def terminate(self):
+        self.p.terminate()
+        self.alive = False
+        print("[debug] instance terminated")
+    def subprocess_worker(self, child_conn):
+        # ⭐⭐ run in subprocess
+        raise NotImplementedError
+    def send_command(self, cmd):
+        # ⭐ run in main process
+        repeated = False
+        if cmd == self.last_user_input:
+            repeated = True
+            cmd = ""
+        else:
+            self.last_user_input = cmd
+        self.parent_conn.send(PipeCom("user_input", cmd))
+        return repeated, cmd
+    def immediate_showoff_when_possible(self, fp):
+        # ⭐ 主进程
+        # 获取fp的拓展名
+        file_type = fp.split('.')[-1]
+        # 如果是文本文件, 则直接显示文本内容
+        if file_type.lower() in ['png', 'jpg']:
+            image_path = os.path.abspath(fp)
+            self.chatbot.append([
+                '检测到新生图像:',
+                f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
+            ])
+            yield from update_ui(chatbot=self.chatbot, history=self.history)
+    def overwatch_workdir_file_change(self):
+        # ⭐ 主进程 Docker 外挂文件夹监控
+        path_to_overwatch = self.autogen_work_dir
+        change_list = []
+        # 扫描路径下的所有文件, 并与self.previous_work_dir_files中所记录的文件进行对比，
+        # 如果有新文件出现，或者文件的修改时间发生变化，则更新self.previous_work_dir_files中
+        # 把新文件和发生变化的文件的路径记录到 change_list 中
+        for root, dirs, files in os.walk(path_to_overwatch):
+            for file in files:
+                file_path = os.path.join(root, file)
+                if file_path not in self.previous_work_dir_files.keys():
+                    last_modified_time = os.stat(file_path).st_mtime
+                    self.previous_work_dir_files.update({file_path: last_modified_time})
+                    change_list.append(file_path)
+                else:
+                    last_modified_time = os.stat(file_path).st_mtime
+                    if last_modified_time != self.previous_work_dir_files[file_path]:
+                        self.previous_work_dir_files[file_path] = last_modified_time
+                        change_list.append(file_path)
+        if len(change_list) > 0:
+            file_links = ""
+            for f in change_list:
+                res = promote_file_to_downloadzone(f)
+                file_links += f'<br/><a href="file={res}" target="_blank">{res}</a>'
+                yield from self.immediate_showoff_when_possible(f)
+            self.chatbot.append(['检测到新生文档.', f'文档清单如下: {file_links}'])
+            yield from update_ui(chatbot=self.chatbot, history=self.history)
+        return change_list
+    def main_process_ui_control(self, txt, create_or_resume) -> str:
+        # ⭐ 主进程
+        if create_or_resume == 'create':
+            self.cnt = 1
+            self.parent_conn = self.launch_subprocess_with_pipe() # ⭐⭐⭐
+        repeated, cmd_to_autogen = self.send_command(txt)
+        if txt == 'exit':
+            self.chatbot.append([f"结束", "结束信号已明确，终止AutoGen程序。"])
+            yield from update_ui(chatbot=self.chatbot, history=self.history)
+            self.terminate()
+            return "terminate"
+        # patience = 10
+        while True:
+            time.sleep(0.5)
+            if not self.alive:
+                # the heartbeat watchdog might have it killed
+                self.terminate()
+                return "terminate"
+            if self.parent_conn.poll():
+                self.feed_heartbeat_watchdog()
+                if "[GPT-Academic] 等待中" in self.chatbot[-1][-1]:
+                    self.chatbot.pop(-1)  # remove the last line
+                if "等待您的进一步指令" in self.chatbot[-1][-1]:
+                    self.chatbot.pop(-1)  # remove the last line
+                if '[GPT-Academic] 等待中' in self.chatbot[-1][-1]:
+                    self.chatbot.pop(-1)    # remove the last line
+                msg = self.parent_conn.recv() # PipeCom
+                if msg.cmd == "done":
+                    self.chatbot.append([f"结束", msg.content])
+                    self.cnt += 1
+                    yield from update_ui(chatbot=self.chatbot, history=self.history)
+                    self.terminate()
+                    break
+                if msg.cmd == "show":
+                    yield from self.overwatch_workdir_file_change()
+                    notice = ""
+                    if repeated: notice = "（自动忽略重复的输入）"
+                    self.chatbot.append([f"运行阶段-{self.cnt}（上次用户反馈输入为: 「{cmd_to_autogen}」{notice}", msg.content])
+                    self.cnt += 1
+                    yield from update_ui(chatbot=self.chatbot, history=self.history)
+                if msg.cmd == "interact":
+                    yield from self.overwatch_workdir_file_change()
+                    self.chatbot.append([f"程序抵达用户反馈节点.", msg.content +
+                                         "\n\n等待您的进一步指令." +
+                                         "\n\n(1) 一般情况下您不需要说什么, 清空输入区, 然后直接点击“提交”以继续. " +
+                                         "\n\n(2) 如果您需要补充些什么, 输入要反馈的内容, 直接点击“提交”以继续. " +
+                                         "\n\n(3) 如果您想终止程序, 输入exit, 直接点击“提交”以终止AutoGen并解锁. "
+                    ])
+                    yield from update_ui(chatbot=self.chatbot, history=self.history)
+                    # do not terminate here, leave the subprocess_worker instance alive
+                    return "wait_feedback"
+            else:
+                self.feed_heartbeat_watchdog()
+                if '[GPT-Academic] 等待中' not in self.chatbot[-1][-1]:
+                    # begin_waiting_time = time.time()
+                    self.chatbot.append(["[GPT-Academic] 等待AutoGen执行结果 ...", "[GPT-Academic] 等待中"])
+                self.chatbot[-1] = [self.chatbot[-1][0], self.chatbot[-1][1].replace("[GPT-Academic] 等待中", "[GPT-Academic] 等待中.")]
+                yield from update_ui(chatbot=self.chatbot, history=self.history)
+                # if time.time() - begin_waiting_time > patience:
+                #     self.chatbot.append([f"结束", "等待超时, 终止AutoGen程序。"])
+                #     yield from update_ui(chatbot=self.chatbot, history=self.history)
+                #     self.terminate()
+                #     return "terminate"
+        self.terminate()
+        return "terminate"
+    def subprocess_worker_wait_user_feedback(self, wait_msg="wait user feedback"):
+        # ⭐⭐ run in subprocess
+        patience = 5 * 60
+        begin_waiting_time = time.time()
+        self.child_conn.send(PipeCom("interact", wait_msg))
+        while True:
+            time.sleep(0.5)
+            if self.child_conn.poll():
+                wait_success = True
+                break
+            if time.time() - begin_waiting_time > patience:
+                self.child_conn.send(PipeCom("done", ""))
+                wait_success = False
+                break
+        return wait_success

crazy_functions/agent_fns/watchdog.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import threading, time
+class WatchDog():
+    def __init__(self, timeout, bark_fn, interval=3, msg="") -> None:
+        self.last_feed = None
+        self.timeout = timeout
+        self.bark_fn = bark_fn
+        self.interval = interval
+        self.msg = msg
+        self.kill_dog = False
+    def watch(self):
+        while True:
+            if self.kill_dog: break
+            if time.time() - self.last_feed > self.timeout:
+                if len(self.msg) > 0: print(self.msg)
+                self.bark_fn()
+                break
+            time.sleep(self.interval)
+    def begin_watch(self):
+        self.last_feed = time.time()
+        th = threading.Thread(target=self.watch)
+        th.daemon = True
+        th.start()
+    def feed(self):
+        self.last_feed = time.time()

crazy_functions/chatglm微调工具.py ADDED Viewed

	@@ -0,0 +1,141 @@

+from toolbox import CatchException, update_ui, promote_file_to_downloadzone
+from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+import datetime, json
+def fetch_items(list_of_items, batch_size):
+    for i in range(0, len(list_of_items), batch_size):
+        yield list_of_items[i:i + batch_size]
+def string_to_options(arguments):
+    import argparse
+    import shlex
+    # Create an argparse.ArgumentParser instance
+    parser = argparse.ArgumentParser()
+    # Add command-line arguments
+    parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
+    parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
+    parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
+    parser.add_argument("--batch", type=int, help="System prompt", default=50)
+    parser.add_argument("--pre_seq_len", type=int, help="pre_seq_len", default=50)
+    parser.add_argument("--learning_rate", type=float, help="learning_rate", default=2e-2)
+    parser.add_argument("--num_gpus", type=int, help="num_gpus", default=1)
+    parser.add_argument("--json_dataset", type=str, help="json_dataset", default="")
+    parser.add_argument("--ptuning_directory", type=str, help="ptuning_directory", default="")
+    # Parse the arguments
+    args = parser.parse_args(shlex.split(arguments))
+    return args
+@CatchException
+def 微调数据集生成(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append(("这是什么功能？", "[Local Message] 微调数据集生成"))
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    args = plugin_kwargs.get("advanced_arg", None)
+    if args is None:
+        chatbot.append(("没给定指令", "退出"))
+        yield from update_ui(chatbot=chatbot, history=history); return
+    else:
+        arguments = string_to_options(arguments=args)
+    dat = []
+    with open(txt, 'r', encoding='utf8') as f:
+        for line in f.readlines():
+            json_dat = json.loads(line)
+            dat.append(json_dat["content"])
+    llm_kwargs['llm_model'] = arguments.llm_to_learn
+    for batch in fetch_items(dat, arguments.batch):
+        res = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+            inputs_array=[f"{arguments.prompt_prefix}\n\n{b}" for b in (batch)],
+            inputs_show_user_array=[f"Show Nothing" for _ in (batch)],
+            llm_kwargs=llm_kwargs,
+            chatbot=chatbot,
+            history_array=[[] for _ in (batch)],
+            sys_prompt_array=[arguments.system_prompt for _ in (batch)],
+            max_workers=10  # OpenAI所允许的最大并行过载
+        )
+        with open(txt+'.generated.json', 'a+', encoding='utf8') as f:
+            for b, r in zip(batch, res[1::2]):
+                f.write(json.dumps({"content":b, "summary":r}, ensure_ascii=False)+'\n')
+    promote_file_to_downloadzone(txt+'.generated.json', rename_file='generated.json', chatbot=chatbot)
+    return
+@CatchException
+def 启动微调(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    import subprocess
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append(("这是什么功能？", "[Local Message] 微调数据集生成"))
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    args = plugin_kwargs.get("advanced_arg", None)
+    if args is None:
+        chatbot.append(("没给定指令", "退出"))
+        yield from update_ui(chatbot=chatbot, history=history); return
+    else:
+        arguments = string_to_options(arguments=args)
+    pre_seq_len = arguments.pre_seq_len             # 128
+    learning_rate = arguments.learning_rate                               # 2e-2
+    num_gpus = arguments.num_gpus                   # 1
+    json_dataset = arguments.json_dataset                 # 't_code.json'
+    ptuning_directory = arguments.ptuning_directory       # '/home/hmp/ChatGLM2-6B/ptuning'
+    command = f"torchrun --standalone --nnodes=1 --nproc-per-node={num_gpus} main.py \
+        --do_train \
+        --train_file AdvertiseGen/{json_dataset} \
+        --validation_file AdvertiseGen/{json_dataset} \
+        --preprocessing_num_workers 20 \
+        --prompt_column content \
+        --response_column summary \
+        --overwrite_cache \
+        --model_name_or_path THUDM/chatglm2-6b \
+        --output_dir output/clothgen-chatglm2-6b-pt-{pre_seq_len}-{learning_rate} \
+        --overwrite_output_dir \
+        --max_source_length 256 \
+        --max_target_length 256 \
+        --per_device_train_batch_size 1 \
+        --per_device_eval_batch_size 1 \
+        --gradient_accumulation_steps 16 \
+        --predict_with_generate \
+        --max_steps 100 \
+        --logging_steps 10 \
+        --save_steps 20 \
+        --learning_rate {learning_rate} \
+        --pre_seq_len {pre_seq_len} \
+        --quantization_bit 4"
+    process = subprocess.Popen(command, shell=True, cwd=ptuning_directory)
+    try:
+        process.communicate(timeout=3600*24)
+    except subprocess.TimeoutExpired:
+        process.kill()
+    return

crazy_functions/crazy_utils.py ADDED Viewed

	@@ -0,0 +1,609 @@

+from toolbox import update_ui, get_conf, trimmed_format_exc, get_max_token, Singleton
+import threading
+import os
+import logging
+def input_clipping(inputs, history, max_token_limit):
+    import numpy as np
+    from request_llms.bridge_all import model_info
+    enc = model_info["gpt-3.5-turbo"]['tokenizer']
+    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
+    mode = 'input-and-history'
+    # 当 输入部分的token占比 小于 全文的一半时，只裁剪历史
+    input_token_num = get_token_num(inputs)
+    if input_token_num < max_token_limit//2:
+        mode = 'only-history'
+        max_token_limit = max_token_limit - input_token_num
+    everything = [inputs] if mode == 'input-and-history' else ['']
+    everything.extend(history)
+    n_token = get_token_num('\n'.join(everything))
+    everything_token = [get_token_num(e) for e in everything]
+    delta = max(everything_token) // 16 # 截断时的颗粒度
+    while n_token > max_token_limit:
+        where = np.argmax(everything_token)
+        encoded = enc.encode(everything[where], disallowed_special=())
+        clipped_encoded = encoded[:len(encoded)-delta]
+        everything[where] = enc.decode(clipped_encoded)[:-1]    # -1 to remove the may-be illegal char
+        everything_token[where] = get_token_num(everything[where])
+        n_token = get_token_num('\n'.join(everything))
+    if mode == 'input-and-history':
+        inputs = everything[0]
+    else:
+        pass
+    history = everything[1:]
+    return inputs, history
+def request_gpt_model_in_new_thread_with_ui_alive(
+        inputs, inputs_show_user, llm_kwargs,
+        chatbot, history, sys_prompt, refresh_interval=0.2,
+        handle_token_exceed=True,
+        retry_times_at_unknown_error=2,
+        ):
+    """
+    Request GPT model，请求GPT模型同时维持用户界面活跃。
+    输入参数 Args （以_array结尾的输入变量都是列表，列表长度为子任务的数量，执行时，会把列表拆解，放到每个子线程中分别执行）:
+        inputs (string): List of inputs （输入）
+        inputs_show_user (string): List of inputs to show user（展现在报告中的输入，借助此参数，在汇总报告中隐藏啰嗦的真实输入，增强报告的可读性）
+        top_p (float): Top p value for sampling from model distribution （GPT参数，浮点数）
+        temperature (float): Temperature value for sampling from model distribution（GPT参数，浮点数）
+        chatbot: chatbot inputs and outputs （用户界面对话窗口句柄，用于数据流可视化）
+        history (list): List of chat history （历史，对话历史列表）
+        sys_prompt (string): List of system prompts （系统输入，列表，用于输入给GPT的前提提示，比如你是翻译官怎样怎样）
+        refresh_interval (float, optional): Refresh interval for UI (default: 0.2) （刷新时间间隔频率，建议低于1，不可高于3，仅仅服务于视觉效果）
+        handle_token_exceed：是否自动处理token溢出的情况，如果选择自动处理，则会在溢出时暴力截断，默认开启
+        retry_times_at_unknown_error：失败时的重试次数
+    输出 Returns:
+        future: 输出，GPT返回的结果
+    """
+    import time
+    from concurrent.futures import ThreadPoolExecutor
+    from request_llms.bridge_all import predict_no_ui_long_connection
+    # 用户反馈
+    chatbot.append([inputs_show_user, ""])
+    yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+    executor = ThreadPoolExecutor(max_workers=16)
+    mutable = ["", time.time(), ""]
+    # 看门狗耐心
+    watch_dog_patience = 5
+    # 请求任务
+    def _req_gpt(inputs, history, sys_prompt):
+        retry_op = retry_times_at_unknown_error
+        exceeded_cnt = 0
+        while True:
+            # watchdog error
+            if len(mutable) >= 2 and (time.time()-mutable[1]) > watch_dog_patience:
+                raise RuntimeError("检测到程序终止。")
+            try:
+                # 【第一种情况】：顺利完成
+                result = predict_no_ui_long_connection(
+                    inputs=inputs, llm_kwargs=llm_kwargs,
+                    history=history, sys_prompt=sys_prompt, observe_window=mutable)
+                return result
+            except ConnectionAbortedError as token_exceeded_error:
+                # 【第二种情况】：Token溢出
+                if handle_token_exceed:
+                    exceeded_cnt += 1
+                    # 【选择处理】 尝试计算比例，尽可能多地保留文本
+                    from toolbox import get_reduce_token_percent
+                    p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
+                    MAX_TOKEN = get_max_token(llm_kwargs)
+                    EXCEED_ALLO = 512 + 512 * exceeded_cnt
+                    inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
+                    mutable[0] += f'[Local Message] 警告，文本过长将进行截断，Token溢出数：{n_exceed}。\n\n'
+                    continue # 返回重试
+                else:
+                    # 【选择放弃】
+                    tb_str = '```\n' + trimmed_format_exc() + '```'
+                    mutable[0] += f"[Local Message] 警告，在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
+                    return mutable[0] # 放弃
+            except:
+                # 【第三种情况】：其他错误：重试几次
+                tb_str = '```\n' + trimmed_format_exc() + '```'
+                print(tb_str)
+                mutable[0] += f"[Local Message] 警告，在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
+                if retry_op > 0:
+                    retry_op -= 1
+                    mutable[0] += f"[Local Message] 重试中，请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}：\n\n"
+                    if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
+                        time.sleep(30)
+                    time.sleep(5)
+                    continue # 返回重试
+                else:
+                    time.sleep(5)
+                    return mutable[0] # 放弃
+    # 提交任务
+    future = executor.submit(_req_gpt, inputs, history, sys_prompt)
+    while True:
+        # yield一次以刷新前端页面
+        time.sleep(refresh_interval)
+        # “喂狗”（看门狗）
+        mutable[1] = time.time()
+        if future.done():
+            break
+        chatbot[-1] = [chatbot[-1][0], mutable[0]]
+        yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+    final_result = future.result()
+    chatbot[-1] = [chatbot[-1][0], final_result]
+    yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了，则删除报错信息
+    return final_result
+def can_multi_process(llm):
+    if llm.startswith('gpt-'): return True
+    if llm.startswith('api2d-'): return True
+    if llm.startswith('azure-'): return True
+    if llm.startswith('spark'): return True
+    if llm.startswith('zhipuai'): return True
+    return False
+def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+        inputs_array, inputs_show_user_array, llm_kwargs,
+        chatbot, history_array, sys_prompt_array,
+        refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
+        handle_token_exceed=True, show_user_at_complete=False,
+        retry_times_at_unknown_error=2,
+        ):
+    """
+    Request GPT model using multiple threads with UI and high efficiency
+    请求GPT模型的[多线程]版。
+    具备以下功能：
+        实时在UI上反馈远程数据流
+        使用线程池，可调节线程池的大小避免openai的流量限制错误
+        处理中途中止的情况
+        网络等出问题时，会把traceback和已经接收的数据转入输出
+    输入参数 Args （以_array结尾的输入变量都是列表，列表长度为子任务的数量，执行时，会把列表拆解，放到每个子线程中分别执行）:
+        inputs_array (list): List of inputs （每个子任务的输入）
+        inputs_show_user_array (list): List of inputs to show user（每个子任务展现在报告中的输入，借助此参数，在汇总报告中隐藏啰嗦的真实输入，增强报告的可读性）
+        llm_kwargs: llm_kwargs参数
+        chatbot: chatbot （用户界面对话窗口句柄，用于数据流可视化）
+        history_array (list): List of chat history （历史对话输入，双层列表，第一层列表是子任务分解，第二层列表是对话历史）
+        sys_prompt_array (list): List of system prompts （系统输入，列表，用于输入给GPT的前提提示，比如你是翻译官怎样怎样）
+        refresh_interval (float, optional): Refresh interval for UI (default: 0.2) （刷新时间间隔频率，建议低于1，不可高于3，仅仅服务于视觉效果）
+        max_workers (int, optional): Maximum number of threads (default: see config.py) （最大线程数，如果子任务非常多，需要用此选项防止高频地请求openai导致错误）
+        scroller_max_len (int, optional): Maximum length for scroller (default: 30)（数据流的显示最后收到的多少个字符，仅仅服务于视觉效果）
+        handle_token_exceed (bool, optional): （是否在输入过长时，自动缩减文本）
+        handle_token_exceed：是否自动处理token溢出的情况，如果选择自动处理，则会在溢出时暴力截断，默认开启
+        show_user_at_complete (bool, optional): (在结束时，把完整输入-输出结果显示在聊天框)
+        retry_times_at_unknown_error：子任务失败时的重试次数
+    输出 Returns:
+        list: List of GPT model responses （每个子任务的输出汇总，如果某个子任务出错，response中会携带traceback报错信息，方��调试和定位问题。）
+    """
+    import time, random
+    from concurrent.futures import ThreadPoolExecutor
+    from request_llms.bridge_all import predict_no_ui_long_connection
+    assert len(inputs_array) == len(history_array)
+    assert len(inputs_array) == len(sys_prompt_array)
+    if max_workers == -1: # 读取配置文件
+        try: max_workers = get_conf('DEFAULT_WORKER_NUM')
+        except: max_workers = 8
+        if max_workers <= 0: max_workers = 3
+    # 屏蔽掉 chatglm的多线程，可能会导致严重卡顿
+    if not can_multi_process(llm_kwargs['llm_model']):
+        max_workers = 1
+    executor = ThreadPoolExecutor(max_workers=max_workers)
+    n_frag = len(inputs_array)
+    # 用户反馈
+    chatbot.append(["请开始多线程操作。", ""])
+    yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+    # 跨线程传递
+    mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
+    # 看门狗耐心
+    watch_dog_patience = 5
+    # 子线程任务
+    def _req_gpt(index, inputs, history, sys_prompt):
+        gpt_say = ""
+        retry_op = retry_times_at_unknown_error
+        exceeded_cnt = 0
+        mutable[index][2] = "执行中"
+        detect_timeout = lambda: len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > watch_dog_patience
+        while True:
+            # watchdog error
+            if detect_timeout(): raise RuntimeError("检测到程序终止。")
+            try:
+                # 【第一种情况】：顺利完成
+                gpt_say = predict_no_ui_long_connection(
+                    inputs=inputs, llm_kwargs=llm_kwargs, history=history,
+                    sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
+                )
+                mutable[index][2] = "已成功"
+                return gpt_say
+            except ConnectionAbortedError as token_exceeded_error:
+                # 【第二种情况】：Token溢出
+                if handle_token_exceed:
+                    exceeded_cnt += 1
+                    # 【选择处理】 尝试计算比例，尽可能多地保留文本
+                    from toolbox import get_reduce_token_percent
+                    p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
+                    MAX_TOKEN = get_max_token(llm_kwargs)
+                    EXCEED_ALLO = 512 + 512 * exceeded_cnt
+                    inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
+                    gpt_say += f'[Local Message] 警告，文本过长将进行截断，Token溢出数：{n_exceed}。\n\n'
+                    mutable[index][2] = f"截断重试"
+                    continue # 返回重试
+                else:
+                    # 【选择放弃】
+                    tb_str = '```\n' + trimmed_format_exc() + '```'
+                    gpt_say += f"[Local Message] 警告，线程{index}在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
+                    if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答：\n\n" + mutable[index][0]
+                    mutable[index][2] = "输入过长已放弃"
+                    return gpt_say # 放弃
+            except:
+                # 【第三种情况】：其他错误
+                if detect_timeout(): raise RuntimeError("检测到程序终止。")
+                tb_str = '```\n' + trimmed_format_exc() + '```'
+                print(tb_str)
+                gpt_say += f"[Local Message] 警告，线程{index}在执行过程中遭遇问题, Traceback：\n\n{tb_str}\n\n"
+                if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答：\n\n" + mutable[index][0]
+                if retry_op > 0:
+                    retry_op -= 1
+                    wait = random.randint(5, 20)
+                    if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
+                        wait = wait * 3
+                        fail_info = "OpenAI绑定信用卡可解除频率限制 "
+                    else:
+                        fail_info = ""
+                    # 也许等待十几秒后，情况会好转
+                    for i in range(wait):
+                        mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
+                    # 开始重试
+                    if detect_timeout(): raise RuntimeError("检测到程序终止。")
+                    mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
+                    continue # 返回重试
+                else:
+                    mutable[index][2] = "已失败"
+                    wait = 5
+                    time.sleep(5)
+                    return gpt_say # 放弃
+    # 异步任务开始
+    futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
+        range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
+    cnt = 0
+    while True:
+        # yield一次以刷新前端页面
+        time.sleep(refresh_interval)
+        cnt += 1
+        worker_done = [h.done() for h in futures]
+        # 更好的UI视觉效果
+        observe_win = []
+        # 每个线程都要“喂狗”（看门狗）
+        for thread_index, _ in enumerate(worker_done):
+            mutable[thread_index][1] = time.time()
+        # 在前端打印些好玩的东西
+        for thread_index, _ in enumerate(worker_done):
+            print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
+                replace('\n', '').replace('`', '.').replace(
+                    ' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
+            observe_win.append(print_something_really_funny)
+        # 在前端打印些好玩的东西
+        stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
+                            if not done else f'`{mutable[thread_index][2]}`\n\n'
+                            for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
+        # 在前端打印些好玩的东西
+        chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始，完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
+        yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+        if all(worker_done):
+            executor.shutdown()
+            break
+    # 异步任务结束
+    gpt_response_collection = []
+    for inputs_show_user, f in zip(inputs_show_user_array, futures):
+        gpt_res = f.result()
+        gpt_response_collection.extend([inputs_show_user, gpt_res])
+    # 是否在结束时，在界面上显示结果
+    if show_user_at_complete:
+        for inputs_show_user, f in zip(inputs_show_user_array, futures):
+            gpt_res = f.result()
+            chatbot.append([inputs_show_user, gpt_res])
+            yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+            time.sleep(0.5)
+    return gpt_response_collection
+def read_and_clean_pdf_text(fp):
+    """
+    这个函数用于分割pdf，用了很多trick，逻辑较乱，效果奇好
+    **输入参数说明**
+    - `fp`：需要读取和清理文本的pdf文件路径
+    **输出参数说明**
+    - `meta_txt`：清理后的文本内容字符串
+    - `page_one_meta`：第一页清理后的文本内容列表
+    **函数功能**
+    读取pdf文件并清理其中的文本内容，清理规则包括：
+    - 提取所有块元的文本信息，并合并为一个字符串
+    - 去除短块（字符数小于100）并替换为回车符
+    - 清理多余的空行
+    - 合并小写字母开头的段落块并替换为空格
+    - 清除重复的换行
+    - 将每个换行符替换为两个换行符，使每个段落之间有两个换行符分隔
+    """
+    import fitz, copy
+    import re
+    import numpy as np
+    from colorful import print亮黄, print亮绿
+    fc = 0  # Index 0 文本
+    fs = 1  # Index 1 字体
+    fb = 2  # Index 2 框框
+    REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 （比正文字体小，如参考文献、脚注、图注等）
+    REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的？时，判定为不是正文（有些文章的正文部分字体大小不是100%统一的，有肉眼不可见的小变化）
+    def primary_ffsize(l):
+        """
+        提取文本块主字体
+        """
+        fsize_statiscs = {}
+        for wtf in l['spans']:
+            if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
+            fsize_statiscs[wtf['size']] += len(wtf['text'])
+        return max(fsize_statiscs, key=fsize_statiscs.get)
+    def ffsize_same(a,b):
+        """
+        提取字体大小是否近似相等
+        """
+        return abs((a-b)/max(a,b)) < 0.02
+    with fitz.open(fp) as doc:
+        meta_txt = []
+        meta_font = []
+        meta_line = []
+        meta_span = []
+        ############################## <第 1 步，搜集初始信息> ##################################
+        for index, page in enumerate(doc):
+            # file_content += page.get_text()
+            text_areas = page.get_text("dict")  # 获取页面上的文本信息
+            for t in text_areas['blocks']:
+                if 'lines' in t:
+                    pf = 998
+                    for l in t['lines']:
+                        txt_line = "".join([wtf['text'] for wtf in l['spans']])
+                        if len(txt_line) == 0: continue
+                        pf = primary_ffsize(l)
+                        meta_line.append([txt_line, pf, l['bbox'], l])
+                        for wtf in l['spans']: # for l in t['lines']:
+                            meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
+                    # meta_line.append(["NEW_BLOCK", pf])
+            # 块元提取                           for each word segment with in line                       for each line         cross-line words                          for each block
+            meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
+                '- ', '') for t in text_areas['blocks'] if 'lines' in t])
+            meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
+                             for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
+            if index == 0:
+                page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
+                    '- ', '') for t in text_areas['blocks'] if 'lines' in t]
+        ############################## <第 2 步，获取正文主字体> ##################################
+        try:
+            fsize_statiscs = {}
+            for span in meta_span:
+                if span[1] not in fsize_statiscs: fsize_statiscs[span[1]] = 0
+                fsize_statiscs[span[1]] += span[2]
+            main_fsize = max(fsize_statiscs, key=fsize_statiscs.get)
+            if REMOVE_FOOT_NOTE:
+                give_up_fize_threshold = main_fsize * REMOVE_FOOT_FFSIZE_PERCENT
+        except:
+            raise RuntimeError(f'抱歉, 我们暂时无法解析此PDF文档: {fp}。')
+        ############################## <第 3 步，切分和重新整合> ##################################
+        mega_sec = []
+        sec = []
+        for index, line in enumerate(meta_line):
+            if index == 0:
+                sec.append(line[fc])
+                continue
+            if REMOVE_FOOT_NOTE:
+                if meta_line[index][fs] <= give_up_fize_threshold:
+                    continue
+            if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
+                # 尝试识别段落
+                if meta_line[index][fc].endswith('.') and\
+                    (meta_line[index-1][fc] != 'NEW_BLOCK') and \
+                    (meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
+                    sec[-1] += line[fc]
+                    sec[-1] += "\n\n"
+                else:
+                    sec[-1] += " "
+                    sec[-1] += line[fc]
+            else:
+                if (index+1 < len(meta_line)) and \
+                    meta_line[index][fs] > main_fsize:
+                    # 单行 + 字体大
+                    mega_sec.append(copy.deepcopy(sec))
+                    sec = []
+                    sec.append("# " + line[fc])
+                else:
+                    # 尝试识别section
+                    if meta_line[index-1][fs] > meta_line[index][fs]:
+                        sec.append("\n" + line[fc])
+                    else:
+                        sec.append(line[fc])
+        mega_sec.append(copy.deepcopy(sec))
+        finals = []
+        for ms in mega_sec:
+            final = " ".join(ms)
+            final = final.replace('- ', ' ')
+            finals.append(final)
+        meta_txt = finals
+        ############################## <第 4 步，乱七八糟的后处理> ##################################
+        def 把字符太少的块清除为回车(meta_txt):
+            for index, block_txt in enumerate(meta_txt):
+                if len(block_txt) < 100:
+                    meta_txt[index] = '\n'
+            return meta_txt
+        meta_txt = 把字符太少的块清除为回车(meta_txt)
+        def 清理多余的空行(meta_txt):
+            for index in reversed(range(1, len(meta_txt))):
+                if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
+                    meta_txt.pop(index)
+            return meta_txt
+        meta_txt = 清理多余的空行(meta_txt)
+        def 合并小写开头的段落块(meta_txt):
+            def starts_with_lowercase_word(s):
+                pattern = r"^[a-z]+"
+                match = re.match(pattern, s)
+                if match:
+                    return True
+                else:
+                    return False
+            # 对于某些PDF会有第一个段落就以小写字母开头,为了避免索引错误将其更改为大写
+            if starts_with_lowercase_word(meta_txt[0]):
+                meta_txt[0] = meta_txt[0].capitalize()
+            for _ in range(100):
+                for index, block_txt in enumerate(meta_txt):
+                    if starts_with_lowercase_word(block_txt):
+                        if meta_txt[index-1] != '\n':
+                            meta_txt[index-1] += ' '
+                        else:
+                            meta_txt[index-1] = ''
+                        meta_txt[index-1] += meta_txt[index]
+                        meta_txt[index] = '\n'
+            return meta_txt
+        meta_txt = 合并小写开头的段落块(meta_txt)
+        meta_txt = 清理多余的空行(meta_txt)
+        meta_txt = '\n'.join(meta_txt)
+        # 清除重复的换行
+        for _ in range(5):
+            meta_txt = meta_txt.replace('\n\n', '\n')
+        # 换行 -> 双换行
+        meta_txt = meta_txt.replace('\n', '\n\n')
+        ############################## <第 5 步，展示分割效果> ##################################
+        # for f in finals:
+        #    print亮黄(f)
+        #    print亮绿('***************************')
+    return meta_txt, page_one_meta
+def get_files_from_everything(txt, type): # type='.md'
+    """
+    这个函数是用来获取指定目录下所有指定类型（如.md）的文件，并且对于网络上的文件，也可以获取它。
+    下面是对每个参数和返回值的说明：
+    参数
+    - txt: 路径或网址，表示要搜索的文件或者文件夹路径或网络上的文件。
+    - type: 字符串，表示要搜索的文件类型。默认是.md。
+    返回值
+    - success: 布尔值，表示函数是否成功执行。
+    - file_manifest: 文件路径列表，里面包含以指定类型为后缀名的所有文件的绝对路径。
+    - project_folder: 字符串，表示文件所在的文件夹路径。如果是网络上的文件，就是临时文件夹的路径。
+    该函数详细注释已添加，请确认是否满足您的需要。
+    """
+    import glob, os
+    success = True
+    if txt.startswith('http'):
+        # 网络的远程文件
+        import requests
+        from toolbox import get_conf
+        from toolbox import get_log_folder, gen_time_str
+        proxies = get_conf('proxies')
+        try:
+            r = requests.get(txt, proxies=proxies)
+        except:
+            raise ConnectionRefusedError(f"无法下载资源{txt}，请检查。")
+        path = os.path.join(get_log_folder(plugin_name='web_download'), gen_time_str()+type)
+        with open(path, 'wb+') as f: f.write(r.content)
+        project_folder = get_log_folder(plugin_name='web_download')
+        file_manifest = [path]
+    elif txt.endswith(type):
+        # 直接给定文件
+        file_manifest = [txt]
+        project_folder = os.path.dirname(txt)
+    elif os.path.exists(txt):
+        # 本地路径，递归搜索
+        project_folder = txt
+        file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
+        if len(file_manifest) == 0:
+            success = False
+    else:
+        project_folder = None
+        file_manifest = []
+        success = False
+    return success, file_manifest, project_folder
+@Singleton
+class nougat_interface():
+    def __init__(self):
+        self.threadLock = threading.Lock()
+    def nougat_with_timeout(self, command, cwd, timeout=3600):
+        import subprocess
+        from toolbox import ProxyNetworkActivate
+        logging.info(f'正在执行命令 {command}')
+        with ProxyNetworkActivate("Nougat_Download"):
+            process = subprocess.Popen(command, shell=True, cwd=cwd, env=os.environ)
+        try:
+            stdout, stderr = process.communicate(timeout=timeout)
+        except subprocess.TimeoutExpired:
+            process.kill()
+            stdout, stderr = process.communicate()
+            print("Process timed out!")
+            return False
+        return True
+    def NOUGAT_parse_pdf(self, fp, chatbot, history):
+        from toolbox import update_ui_lastest_msg
+        yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度：正在排队, 等待线程锁...",
+                                         chatbot=chatbot, history=history, delay=0)
+        self.threadLock.acquire()
+        import glob, threading, os
+        from toolbox import get_log_folder, gen_time_str
+        dst = os.path.join(get_log_folder(plugin_name='nougat'), gen_time_str())
+        os.makedirs(dst)
+        yield from update_ui_lastest_msg("正在解析论文, 请稍候。进度：正在加载NOUGAT... （提示：首次运行需要花费较长时间下载NOUGAT参数）",
+                                         chatbot=chatbot, history=history, delay=0)
+        self.nougat_with_timeout(f'nougat --out "{os.path.abspath(dst)}" "{os.path.abspath(fp)}"', os.getcwd(), timeout=3600)
+        res = glob.glob(os.path.join(dst,'*.mmd'))
+        if len(res) == 0:
+            self.threadLock.release()
+            raise RuntimeError("Nougat解析论文失败。")
+        self.threadLock.release()
+        return res[0]
+def try_install_deps(deps, reload_m=[]):
+    import subprocess, sys, importlib
+    for dep in deps:
+        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
+    import site
+    importlib.reload(site)
+    for m in reload_m:
+        importlib.reload(__import__(m))
+def get_plugin_arg(plugin_kwargs, key, default):
+    # 如果参数是空的
+    if (key in plugin_kwargs) and (plugin_kwargs[key] == ""): plugin_kwargs.pop(key)
+    # 正常情况
+    return plugin_kwargs.get(key, default)

crazy_functions/gen_fns/gen_fns_shared.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import time
+import importlib
+from toolbox import trimmed_format_exc, gen_time_str, get_log_folder
+from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, is_the_upload_folder
+from toolbox import promote_file_to_downloadzone, get_log_folder, update_ui_lastest_msg
+import multiprocessing
+def get_class_name(class_string):
+    import re
+    # Use regex to extract the class name
+    class_name = re.search(r'class (\w+)\(', class_string).group(1)
+    return class_name
+def try_make_module(code, chatbot):
+    module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
+    fn_path = f'{get_log_folder(plugin_name="gen_plugin_verify")}/{module_file}.py'
+    with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
+    promote_file_to_downloadzone(fn_path, chatbot=chatbot)
+    class_name = get_class_name(code)
+    manager = multiprocessing.Manager()
+    return_dict = manager.dict()
+    p = multiprocessing.Process(target=is_function_successfully_generated, args=(fn_path, class_name, return_dict))
+    # only has 10 seconds to run
+    p.start(); p.join(timeout=10)
+    if p.is_alive(): p.terminate(); p.join()
+    p.close()
+    return return_dict["success"], return_dict['traceback']
+# check is_function_successfully_generated
+def is_function_successfully_generated(fn_path, class_name, return_dict):
+    return_dict['success'] = False
+    return_dict['traceback'] = ""
+    try:
+        # Create a spec for the module
+        module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
+        # Load the module
+        example_module = importlib.util.module_from_spec(module_spec)
+        module_spec.loader.exec_module(example_module)
+        # Now you can use the module
+        some_class = getattr(example_module, class_name)
+        # Now you can create an instance of the class
+        instance = some_class()
+        return_dict['success'] = True
+        return
+    except:
+        return_dict['traceback'] = trimmed_format_exc()
+        return
+def subprocess_worker(code, file_path, return_dict):
+    return_dict['result'] = None
+    return_dict['success'] = False
+    return_dict['traceback'] = ""
+    try:
+        module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
+        fn_path = f'{get_log_folder(plugin_name="gen_plugin_run")}/{module_file}.py'
+        with open(fn_path, 'w', encoding='utf8') as f: f.write(code)
+        class_name = get_class_name(code)
+        # Create a spec for the module
+        module_spec = importlib.util.spec_from_file_location('example_module', fn_path)
+        # Load the module
+        example_module = importlib.util.module_from_spec(module_spec)
+        module_spec.loader.exec_module(example_module)
+        # Now you can use the module
+        some_class = getattr(example_module, class_name)
+        # Now you can create an instance of the class
+        instance = some_class()
+        return_dict['result'] = instance.run(file_path)
+        return_dict['success'] = True
+    except:
+        return_dict['traceback'] = trimmed_format_exc()

crazy_functions/ipc_fns/mp.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import platform
+import pickle
+import multiprocessing
+def run_in_subprocess_wrapper_func(v_args):
+    func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
+    import sys
+    try:
+        result = func(*args, **kwargs)
+        return_dict['result'] = result
+    except Exception as e:
+        exc_info = sys.exc_info()
+        exception_dict['exception'] = exc_info
+def run_in_subprocess_with_timeout(func, timeout=60):
+    if platform.system() == 'Linux':
+        def wrapper(*args, **kwargs):
+            return_dict = multiprocessing.Manager().dict()
+            exception_dict = multiprocessing.Manager().dict()
+            v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
+            process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
+            process.start()
+            process.join(timeout)
+            if process.is_alive():
+                process.terminate()
+                raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
+            process.close()
+            if 'exception' in exception_dict:
+                # ooops, the subprocess ran into an exception
+                exc_info = exception_dict['exception']
+                raise exc_info[1].with_traceback(exc_info[2])
+            if 'result' in return_dict.keys():
+                # If the subprocess ran successfully, return the result
+                return return_dict['result']
+        return wrapper
+    else:
+        return func

crazy_functions/json_fns/pydantic_io.py ADDED Viewed

	@@ -0,0 +1,111 @@

+"""
+https://github.com/langchain-ai/langchain/blob/master/docs/extras/modules/model_io/output_parsers/pydantic.ipynb
+Example 1.
+# Define your desired data structure.
+class Joke(BaseModel):
+    setup: str = Field(description="question to set up a joke")
+    punchline: str = Field(description="answer to resolve the joke")
+    # You can add custom validation logic easily with Pydantic.
+    @validator("setup")
+    def question_ends_with_question_mark(cls, field):
+        if field[-1] != "?":
+            raise ValueError("Badly formed question!")
+        return field
+Example 2.
+# Here's another example, but with a compound typed field.
+class Actor(BaseModel):
+    name: str = Field(description="name of an actor")
+    film_names: List[str] = Field(description="list of names of films they starred in")
+"""
+import json, re, logging
+PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
+As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
+the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.
+Here is the output schema:
+```
+{schema}
+```"""
+PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
+```
+{schema}
+```"""
+class JsonStringError(Exception): ...
+class GptJsonIO():
+    def __init__(self, schema, example_instruction=True):
+        self.pydantic_object = schema
+        self.example_instruction = example_instruction
+        self.format_instructions = self.generate_format_instructions()
+    def generate_format_instructions(self):
+        schema = self.pydantic_object.schema()
+        # Remove extraneous fields.
+        reduced_schema = schema
+        if "title" in reduced_schema:
+            del reduced_schema["title"]
+        if "type" in reduced_schema:
+            del reduced_schema["type"]
+        # Ensure json in context is well-formed with double quotes.
+        if self.example_instruction:
+            schema_str = json.dumps(reduced_schema)
+            return PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)
+        else:
+            return PYDANTIC_FORMAT_INSTRUCTIONS_SIMPLE.format(schema=schema_str)
+    def generate_output(self, text):
+        # Greedy search for 1st json candidate.
+        match = re.search(
+            r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
+        )
+        json_str = ""
+        if match: json_str = match.group()
+        json_object = json.loads(json_str, strict=False)
+        final_object = self.pydantic_object.parse_obj(json_object)
+        return final_object
+    def generate_repair_prompt(self, broken_json, error):
+        prompt = "Fix a broken json string.\n\n" + \
+                 "(1) The broken json string need to fix is: \n\n" + \
+                 "```" + "\n" + \
+                 broken_json + "\n" + \
+                 "```" + "\n\n" + \
+                 "(2) The error message is: \n\n" + \
+                 error + "\n\n" + \
+                "Now, fix this json string. \n\n"
+        return prompt
+    def generate_output_auto_repair(self, response, gpt_gen_fn):
+        """
+        response: string containing canidate json
+        gpt_gen_fn: gpt_gen_fn(inputs, sys_prompt)
+        """
+        try:
+            result = self.generate_output(response)
+        except Exception as e:
+            try:
+                logging.info(f'Repairing json：{response}')
+                repair_prompt = self.generate_repair_prompt(broken_json = response, error=repr(e))
+                result = self.generate_output(gpt_gen_fn(repair_prompt, self.format_instructions))
+                logging.info('Repaire json success.')
+            except Exception as e:
+                # 没辙了，放弃治疗
+                logging.info('Repaire json fail.')
+                raise JsonStringError('Cannot repair json.', str(e))
+        return result

crazy_functions/live_audio/aliyunASR.py ADDED Viewed

	@@ -0,0 +1,261 @@

+import time, logging, json, sys, struct
+import numpy as np
+from scipy.io.wavfile import WAVE_FORMAT
+def write_numpy_to_wave(filename, rate, data, add_header=False):
+    """
+    Write a NumPy array as a WAV file.
+    """
+    def _array_tofile(fid, data):
+        # ravel gives a c-contiguous buffer
+        fid.write(data.ravel().view('b').data)
+    if hasattr(filename, 'write'):
+        fid = filename
+    else:
+        fid = open(filename, 'wb')
+    fs = rate
+    try:
+        dkind = data.dtype.kind
+        if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
+                                                 data.dtype.itemsize == 1)):
+            raise ValueError("Unsupported data type '%s'" % data.dtype)
+        header_data = b''
+        header_data += b'RIFF'
+        header_data += b'\x00\x00\x00\x00'
+        header_data += b'WAVE'
+        # fmt chunk
+        header_data += b'fmt '
+        if dkind == 'f':
+            format_tag = WAVE_FORMAT.IEEE_FLOAT
+        else:
+            format_tag = WAVE_FORMAT.PCM
+        if data.ndim == 1:
+            channels = 1
+        else:
+            channels = data.shape[1]
+        bit_depth = data.dtype.itemsize * 8
+        bytes_per_second = fs*(bit_depth // 8)*channels
+        block_align = channels * (bit_depth // 8)
+        fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
+                                     bytes_per_second, block_align, bit_depth)
+        if not (dkind == 'i' or dkind == 'u'):
+            # add cbSize field for non-PCM files
+            fmt_chunk_data += b'\x00\x00'
+        header_data += struct.pack('<I', len(fmt_chunk_data))
+        header_data += fmt_chunk_data
+        # fact chunk (non-PCM files)
+        if not (dkind == 'i' or dkind == 'u'):
+            header_data += b'fact'
+            header_data += struct.pack('<II', 4, data.shape[0])
+        # check data size (needs to be immediately before the data chunk)
+        if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
+            raise ValueError("Data exceeds wave file size limit")
+        if add_header:
+            fid.write(header_data)
+            # data chunk
+            fid.write(b'data')
+            fid.write(struct.pack('<I', data.nbytes))
+            if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
+                                            sys.byteorder == 'big'):
+                data = data.byteswap()
+        _array_tofile(fid, data)
+        if add_header:
+            # Determine file size and place it in correct
+            #  position at start of the file.
+            size = fid.tell()
+            fid.seek(4)
+            fid.write(struct.pack('<I', size-8))
+    finally:
+        if not hasattr(filename, 'write'):
+            fid.close()
+        else:
+            fid.seek(0)
+def is_speaker_speaking(vad, data, sample_rate):
+    # Function to detect if the speaker is speaking
+    # The WebRTC VAD only accepts 16-bit mono PCM audio,
+    # sampled at 8000, 16000, 32000 or 48000 Hz.
+    # A frame must be either 10, 20, or 30 ms in duration:
+    frame_duration = 30
+    n_bit_each = int(sample_rate * frame_duration / 1000)*2 # x2 because audio is 16 bit (2 bytes)
+    res_list = []
+    for t in range(len(data)):
+        if t!=0 and t % n_bit_each == 0:
+            res_list.append(vad.is_speech(data[t-n_bit_each:t], sample_rate))
+    info = ''.join(['^' if r else '.' for r in res_list])
+    info = info[:10]
+    if any(res_list):
+        return True, info
+    else:
+        return False, info
+class AliyunASR():
+    def test_on_sentence_begin(self, message, *args):
+        # print("test_on_sentence_begin:{}".format(message))
+        pass
+    def test_on_sentence_end(self, message, *args):
+        # print("test_on_sentence_end:{}".format(message))
+        message = json.loads(message)
+        self.parsed_sentence = message['payload']['result']
+        self.event_on_entence_end.set()
+        # print(self.parsed_sentence)
+    def test_on_start(self, message, *args):
+        # print("test_on_start:{}".format(message))
+        pass
+    def test_on_error(self, message, *args):
+        logging.error("on_error args=>{}".format(args))
+        pass
+    def test_on_close(self, *args):
+        self.aliyun_service_ok = False
+        pass
+    def test_on_result_chg(self, message, *args):
+        # print("test_on_chg:{}".format(message))
+        message = json.loads(message)
+        self.parsed_text = message['payload']['result']
+        self.event_on_result_chg.set()
+    def test_on_completed(self, message, *args):
+        # print("on_completed:args=>{} message=>{}".format(args, message))
+        pass
+    def audio_convertion_thread(self, uuid):
+        # 在一个异步线程中采集音频
+        import nls  # pip install git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
+        import tempfile
+        from scipy import io
+        from toolbox import get_conf
+        from .audio_io import change_sample_rate
+        from .audio_io import RealtimeAudioDistribution
+        NEW_SAMPLERATE = 16000
+        rad = RealtimeAudioDistribution()
+        rad.clean_up()
+        temp_folder = tempfile.gettempdir()
+        TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
+        if len(TOKEN) == 0:
+            TOKEN = self.get_token()
+        self.aliyun_service_ok = True
+        URL="wss://nls-gateway.aliyuncs.com/ws/v1"
+        sr = nls.NlsSpeechTranscriber(
+                    url=URL,
+                    token=TOKEN,
+                    appkey=APPKEY,
+                    on_sentence_begin=self.test_on_sentence_begin,
+                    on_sentence_end=self.test_on_sentence_end,
+                    on_start=self.test_on_start,
+                    on_result_changed=self.test_on_result_chg,
+                    on_completed=self.test_on_completed,
+                    on_error=self.test_on_error,
+                    on_close=self.test_on_close,
+                    callback_args=[uuid.hex]
+                )
+        timeout_limit_second = 20
+        r = sr.start(aformat="pcm",
+                timeout=timeout_limit_second,
+                enable_intermediate_result=True,
+                enable_punctuation_prediction=True,
+                enable_inverse_text_normalization=True)
+        import webrtcvad
+        vad = webrtcvad.Vad()
+        vad.set_mode(1)
+        is_previous_frame_transmitted = False   # 上一帧是否有人说话
+        previous_frame_data = None
+        echo_cnt = 0        # 在没有声音之后，继续向服务器发送n次音频数据
+        echo_cnt_max = 4   # 在没有声音之后，继续向服务器发送n次音频数据
+        keep_alive_last_send_time = time.time()
+        while not self.stop:
+            # time.sleep(self.capture_interval)
+            audio = rad.read(uuid.hex)
+            if audio is not None:
+                # convert to pcm file
+                temp_file = f'{temp_folder}/{uuid.hex}.pcm' #
+                dsdata = change_sample_rate(audio, rad.rate, NEW_SAMPLERATE) # 48000 --> 16000
+                write_numpy_to_wave(temp_file, NEW_SAMPLERATE, dsdata)
+                # read pcm binary
+                with open(temp_file, "rb") as f: data = f.read()
+                is_speaking, info = is_speaker_speaking(vad, data, NEW_SAMPLERATE)
+                if is_speaking or echo_cnt > 0:
+                    # 如果话筒激活 / 如果处于回声收尾阶段
+                    echo_cnt -= 1
+                    if not is_previous_frame_transmitted:   # 上一帧没有人声，但是我们把上一帧同样加上
+                        if previous_frame_data is not None: data = previous_frame_data + data
+                    if is_speaking:
+                        echo_cnt = echo_cnt_max
+                    slices = zip(*(iter(data),) * 640)      # 640个字节为一组
+                    for i in slices: sr.send_audio(bytes(i))
+                    keep_alive_last_send_time = time.time()
+                    is_previous_frame_transmitted = True
+                else:
+                    is_previous_frame_transmitted = False
+                    echo_cnt = 0
+                    # 保持链接激活，即使没有声音，也根据时间间隔，发送一些音频片段给服务器
+                    if time.time() - keep_alive_last_send_time > timeout_limit_second/2:
+                        slices = zip(*(iter(data),) * 640)    # 640个字节为一组
+                        for i in slices: sr.send_audio(bytes(i))
+                        keep_alive_last_send_time = time.time()
+                        is_previous_frame_transmitted = True
+                self.audio_shape = info
+            else:
+                time.sleep(0.1)
+            if not self.aliyun_service_ok:
+                self.stop = True
+                self.stop_msg = 'Aliyun音频服务异常，请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
+        r = sr.stop()
+    def get_token(self):
+        from toolbox import get_conf
+        import json
+        from aliyunsdkcore.request import CommonRequest
+        from aliyunsdkcore.client import AcsClient
+        AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET')
+        # 创建AcsClient实例
+        client = AcsClient(
+            AccessKey_ID,
+            AccessKey_secret,
+            "cn-shanghai"
+        )
+        # 创建request，并设置参数。
+        request = CommonRequest()
+        request.set_method('POST')
+        request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
+        request.set_version('2019-02-28')
+        request.set_action_name('CreateToken')
+        try:
+            response = client.do_action_with_exception(request)
+            print(response)
+            jss = json.loads(response)
+            if 'Token' in jss and 'Id' in jss['Token']:
+                token = jss['Token']['Id']
+                expireTime = jss['Token']['ExpireTime']
+                print("token = " + token)
+                print("expireTime = " + str(expireTime))
+        except Exception as e:
+            print(e)
+        return token

crazy_functions/live_audio/audio_io.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import numpy as np
+from scipy import interpolate
+def Singleton(cls):
+    _instance = {}
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+    return _singleton
+@Singleton
+class RealtimeAudioDistribution():
+    def __init__(self) -> None:
+        self.data = {}
+        self.max_len = 1024*1024
+        self.rate = 48000   # 只读，每秒采样数量
+    def clean_up(self):
+        self.data = {}
+    def feed(self, uuid, audio):
+        self.rate, audio_ = audio
+        # print('feed', len(audio_), audio_[-25:])
+        if uuid not in self.data:
+            self.data[uuid] = audio_
+        else:
+            new_arr = np.concatenate((self.data[uuid], audio_))
+            if len(new_arr) > self.max_len: new_arr = new_arr[-self.max_len:]
+            self.data[uuid] = new_arr
+    def read(self, uuid):
+        if uuid in self.data:
+            res = self.data.pop(uuid)
+            # print('\r read-', len(res), '-', max(res), end='', flush=True)
+        else:
+            res = None
+        return res
+def change_sample_rate(audio, old_sr, new_sr):
+    duration = audio.shape[0] / old_sr
+    time_old  = np.linspace(0, duration, audio.shape[0])
+    time_new  = np.linspace(0, duration, int(audio.shape[0] * new_sr / old_sr))
+    interpolator = interpolate.interp1d(time_old, audio.T)
+    new_audio = interpolator(time_new).T
+    return new_audio.astype(np.int16)

crazy_functions/multi_stage/multi_stage_utils.py ADDED Viewed

	@@ -0,0 +1,93 @@

+from pydantic import BaseModel, Field
+from typing import List
+from toolbox import update_ui_lastest_msg, disable_auto_promotion
+from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
+from request_llms.bridge_all import predict_no_ui_long_connection
+from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
+import time
+import pickle
+def have_any_recent_upload_files(chatbot):
+    _5min = 5 * 60
+    if not chatbot: return False    # chatbot is None
+    most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
+    if not most_recent_uploaded: return False   # most_recent_uploaded is None
+    if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
+    else: return False  # most_recent_uploaded is too old
+class GptAcademicState():
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        pass
+    def dump_state(self, chatbot):
+        chatbot._cookies['plugin_state'] = pickle.dumps(self)
+    def set_state(self, chatbot, key, value):
+        setattr(self, key, value)
+        chatbot._cookies['plugin_state'] = pickle.dumps(self)
+    def get_state(chatbot, cls=None):
+        state = chatbot._cookies.get('plugin_state', None)
+        if state is not None:   state = pickle.loads(state)
+        elif cls is not None:   state = cls()
+        else:                   state = GptAcademicState()
+        state.chatbot = chatbot
+        return state
+class GptAcademicGameBaseState():
+    """
+    1. first init: __init__ ->
+    """
+    def init_game(self, chatbot, lock_plugin):
+        self.plugin_name = None
+        self.callback_fn = None
+        self.delete_game = False
+        self.step_cnt = 0
+    def lock_plugin(self, chatbot):
+        if self.callback_fn is None:
+            raise ValueError("callback_fn is None")
+        chatbot._cookies['lock_plugin'] = self.callback_fn
+        self.dump_state(chatbot)
+    def get_plugin_name(self):
+        if self.plugin_name is None:
+            raise ValueError("plugin_name is None")
+        return self.plugin_name
+    def dump_state(self, chatbot):
+        chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
+    def set_state(self, chatbot, key, value):
+        setattr(self, key, value)
+        chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = pickle.dumps(self)
+    @staticmethod
+    def sync_state(chatbot, llm_kwargs, cls, plugin_name, callback_fn, lock_plugin=True):
+        state = chatbot._cookies.get(f'plugin_state/{plugin_name}', None)
+        if state is not None:
+            state = pickle.loads(state)
+        else:
+            state = cls()
+            state.init_game(chatbot, lock_plugin)
+        state.plugin_name = plugin_name
+        state.llm_kwargs = llm_kwargs
+        state.chatbot = chatbot
+        state.callback_fn = callback_fn
+        return state
+    def continue_game(self, prompt, chatbot, history):
+        # 游戏主体
+        yield from self.step(prompt, chatbot, history)
+        self.step_cnt += 1
+        # 保存状态，收尾
+        self.dump_state(chatbot)
+        # 如果游戏结束，清理
+        if self.delete_game:
+            chatbot._cookies['lock_plugin'] = None
+            chatbot._cookies[f'plugin_state/{self.get_plugin_name()}'] = None
+        yield from update_ui(chatbot=chatbot, history=history)

crazy_functions/pdf_fns/breakdown_txt.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
+def force_breakdown(txt, limit, get_token_fn):
+    """ 当无法用标点、空行分割时，我们用最暴力的方法切割
+    """
+    for i in reversed(range(len(txt))):
+        if get_token_fn(txt[:i]) < limit:
+            return txt[:i], txt[i:]
+    return "Tiktoken未知错误", "Tiktoken未知错误"
+def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
+    """ 为了加速计算，我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时， 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
+    当 remain_txt_to_cut < `_min` 时，我们再把 remain_txt_to_cut_storage 中的部分文字取出
+    """
+    _min = int(5e4)
+    _max = int(1e5)
+    # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
+    if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
+        remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
+        remain_txt_to_cut_storage = ""
+    if len(remain_txt_to_cut) > _max:
+        remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
+        remain_txt_to_cut = remain_txt_to_cut[:_max]
+    return remain_txt_to_cut, remain_txt_to_cut_storage
+def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
+    """ 文本切分
+    """
+    res = []
+    total_len = len(txt_tocut)
+    fin_len = 0
+    remain_txt_to_cut = txt_tocut
+    remain_txt_to_cut_storage = ""
+    # 为了加速计算，我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时， 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
+    remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
+    while True:
+        if get_token_fn(remain_txt_to_cut) <= limit:
+            # 如果剩余文本的token数小于限制，那么就不用切了
+            res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
+            break
+        else:
+            # 如果剩余文本的token数大于限制，那么就切
+            lines = remain_txt_to_cut.split('\n')
+            # 估计一个切分点
+            estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
+            estimated_line_cut = int(estimated_line_cut)
+            # 开始查找合适切分点的偏移（cnt）
+            cnt = 0
+            for cnt in reversed(range(estimated_line_cut)):
+                if must_break_at_empty_line:
+                    # 首先尝试用双空行（\n\n）作为切分点
+                    if lines[cnt] != "":
+                        continue
+                prev = "\n".join(lines[:cnt])
+                post = "\n".join(lines[cnt:])
+                if get_token_fn(prev) < limit:
+                    break
+            if cnt == 0:
+                # 如果没有找到合适的切分点
+                if break_anyway:
+                    # 是否允许暴力切分
+                    prev, post = force_breakdown(remain_txt_to_cut, limit, get_token_fn)
+                else:
+                    # 不允许直接报错
+                    raise RuntimeError(f"存在一行极长的文本！{remain_txt_to_cut}")
+            # 追加列表
+            res.append(prev); fin_len+=len(prev)
+            # 准备下一次迭代
+            remain_txt_to_cut = post
+            remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
+            process = fin_len/total_len
+            print(f'正在文本切分 {int(process*100)}%')
+            if len(remain_txt_to_cut.strip()) == 0:
+                break
+    return res
+def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
+    """ 使用多种方式尝试切分文本，以满足 token 限制
+    """
+    from request_llms.bridge_all import model_info
+    enc = model_info[llm_model]['tokenizer']
+    def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
+    try:
+        # 第1次尝试，将双空行（\n\n）作为切分点
+        return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
+    except RuntimeError:
+        try:
+            # 第2次尝试，将单空行（\n）作为切分点
+            return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
+        except RuntimeError:
+            try:
+                # 第3次尝试，将英文句号（.）作为切分点
+                res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的，作为一个标识而存在
+                return [r.replace('。\n', '.') for r in res]
+            except RuntimeError as e:
+                try:
+                    # 第4次尝试，将中文句号（。）作为切分点
+                    res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
+                    return [r.replace('。。\n', '。') for r in res]
+                except RuntimeError as e:
+                    # 第5次尝试，没办法了，随便切一下吧
+                    return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
+breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
+if __name__ == '__main__':
+    from crazy_functions.crazy_utils import read_and_clean_pdf_text
+    file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
+    from request_llms.bridge_all import model_info
+    for i in range(5):
+        file_content += file_content
+    print(len(file_content))
+    TOKEN_LIMIT_PER_FRAGMENT = 2500
+    res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)

crazy_functions/pdf_fns/parse_pdf.py ADDED Viewed

	@@ -0,0 +1,171 @@

+from functools import lru_cache
+from toolbox import gen_time_str
+from toolbox import promote_file_to_downloadzone
+from toolbox import write_history_to_file, promote_file_to_downloadzone
+from toolbox import get_conf
+from toolbox import ProxyNetworkActivate
+from colorful import *
+import requests
+import random
+import copy
+import os
+import math
+class GROBID_OFFLINE_EXCEPTION(Exception): pass
+def get_avail_grobid_url():
+    GROBID_URLS = get_conf('GROBID_URLS')
+    if len(GROBID_URLS) == 0: return None
+    try:
+        _grobid_url = random.choice(GROBID_URLS) # 随机负载均衡
+        if _grobid_url.endswith('/'): _grobid_url = _grobid_url.rstrip('/')
+        with ProxyNetworkActivate('Connect_Grobid'):
+            res = requests.get(_grobid_url+'/api/isalive')
+        if res.text=='true': return _grobid_url
+        else: return None
+    except:
+        return None
+@lru_cache(maxsize=32)
+def parse_pdf(pdf_path, grobid_url):
+    import scipdf   # pip install scipdf_parser
+    if grobid_url.endswith('/'): grobid_url = grobid_url.rstrip('/')
+    try:
+        with ProxyNetworkActivate('Connect_Grobid'):
+            article_dict = scipdf.parse_pdf_to_dict(pdf_path, grobid_url=grobid_url)
+    except GROBID_OFFLINE_EXCEPTION:
+        raise GROBID_OFFLINE_EXCEPTION("GROBID服务不可用，请修改config中的GROBID_URL，可修改成本地GROBID服务。")
+    except:
+        raise RuntimeError("解析PDF失败，请检查PDF是否损坏。")
+    return article_dict
+def produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files):
+    # -=-=-=-=-=-=-=-= 写出第1个文件：翻译前后混合 -=-=-=-=-=-=-=-=
+    res_path = write_history_to_file(meta +  ["# Meta Translation" , paper_meta_info] + gpt_response_collection, file_basename=f"{gen_time_str()}translated_and_original.md", file_fullname=None)
+    promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
+    generated_conclusion_files.append(res_path)
+    # -=-=-=-=-=-=-=-= 写出第2个文件：仅翻译后的文本 -=-=-=-=-=-=-=-=
+    translated_res_array = []
+    # 记录当前的大章节标题：
+    last_section_name = ""
+    for index, value in enumerate(gpt_response_collection):
+        # 先挑选偶数序列号：
+        if index % 2 != 0:
+            # 先提取当前英文标题：
+            cur_section_name = gpt_response_collection[index-1].split('\n')[0].split(" Part")[0]
+            # 如果index是1的话，则直接使用first section name：
+            if cur_section_name != last_section_name:
+                cur_value = cur_section_name + '\n'
+                last_section_name = copy.deepcopy(cur_section_name)
+            else:
+                cur_value = ""
+            # 再做一个小修改：重新修改当前part的标题，默认用英文的
+            cur_value += value
+            translated_res_array.append(cur_value)
+    res_path = write_history_to_file(meta +  ["# Meta Translation" , paper_meta_info] + translated_res_array,
+                                     file_basename = f"{gen_time_str()}-translated_only.md",
+                                     file_fullname = None,
+                                     auto_caption = False)
+    promote_file_to_downloadzone(res_path, rename_file=os.path.basename(res_path)+'.md', chatbot=chatbot)
+    generated_conclusion_files.append(res_path)
+    return res_path
+def translate_pdf(article_dict, llm_kwargs, chatbot, fp, generated_conclusion_files, TOKEN_LIMIT_PER_FRAGMENT, DST_LANG):
+    from crazy_functions.pdf_fns.report_gen_html import construct_html
+    from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
+    from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+    from crazy_functions.crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+    prompt = "以下是一篇学术论文的基本信息:\n"
+    # title
+    title = article_dict.get('title', '无法获取 title'); prompt += f'title:{title}\n\n'
+    # authors
+    authors = article_dict.get('authors', '无法获取 authors')[:100]; prompt += f'authors:{authors}\n\n'
+    # abstract
+    abstract = article_dict.get('abstract', '无法获取 abstract'); prompt += f'abstract:{abstract}\n\n'
+    # command
+    prompt += f"请将题目和摘要翻译为{DST_LANG}。"
+    meta = [f'# Title:\n\n', title, f'# Abstract:\n\n', abstract ]
+    # 单线，获取文章meta信息
+    paper_meta_info = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=prompt,
+        inputs_show_user=prompt,
+        llm_kwargs=llm_kwargs,
+        chatbot=chatbot, history=[],
+        sys_prompt="You are an academic paper reader。",
+    )
+    # 多线，翻译
+    inputs_array = []
+    inputs_show_user_array = []
+    # get_token_num
+    from request_llms.bridge_all import model_info
+    enc = model_info[llm_kwargs['llm_model']]['tokenizer']
+    def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
+    def break_down(txt):
+        raw_token_num = get_token_num(txt)
+        if raw_token_num <= TOKEN_LIMIT_PER_FRAGMENT:
+            return [txt]
+        else:
+            # raw_token_num > TOKEN_LIMIT_PER_FRAGMENT
+            # find a smooth token limit to achieve even seperation
+            count = int(math.ceil(raw_token_num / TOKEN_LIMIT_PER_FRAGMENT))
+            token_limit_smooth = raw_token_num // count + count
+            return breakdown_text_to_satisfy_token_limit(txt, limit=token_limit_smooth, llm_model=llm_kwargs['llm_model'])
+    for section in article_dict.get('sections'):
+        if len(section['text']) == 0: continue
+        section_frags = break_down(section['text'])
+        for i, fragment in enumerate(section_frags):
+            heading = section['heading']
+            if len(section_frags) > 1: heading += f' Part-{i+1}'
+            inputs_array.append(
+                f"你需要翻译{heading}章节，内容如下: \n\n{fragment}"
+            )
+            inputs_show_user_array.append(
+                f"# {heading}\n\n{fragment}"
+            )
+    gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+        inputs_array=inputs_array,
+        inputs_show_user_array=inputs_show_user_array,
+        llm_kwargs=llm_kwargs,
+        chatbot=chatbot,
+        history_array=[meta for _ in inputs_array],
+        sys_prompt_array=[
+            "请你作为一个学术翻译，负责把学术论文准确翻译成中文。注意文章中的每一句话都要翻译。" for _ in inputs_array],
+    )
+    # -=-=-=-=-=-=-=-= 写出Markdown文件 -=-=-=-=-=-=-=-=
+    produce_report_markdown(gpt_response_collection, meta, paper_meta_info, chatbot, fp, generated_conclusion_files)
+    # -=-=-=-=-=-=-=-= 写出HTML文件 -=-=-=-=-=-=-=-=
+    ch = construct_html()
+    orig = ""
+    trans = ""
+    gpt_response_collection_html = copy.deepcopy(gpt_response_collection)
+    for i,k in enumerate(gpt_response_collection_html):
+        if i%2==0:
+            gpt_response_collection_html[i] = inputs_show_user_array[i//2]
+        else:
+            # 先提取当前英文标题：
+            cur_section_name = gpt_response_collection[i-1].split('\n')[0].split(" Part")[0]
+            cur_value = cur_section_name + "\n" + gpt_response_collection_html[i]
+            gpt_response_collection_html[i] = cur_value
+    final = ["", "", "一、论文概况",  "", "Abstract", paper_meta_info,  "二、论文翻译",  ""]
+    final.extend(gpt_response_collection_html)
+    for i, k in enumerate(final):
+        if i%2==0:
+            orig = k
+        if i%2==1:
+            trans = k
+            ch.add_row(a=orig, b=trans)
+    create_report_file_name = f"{os.path.basename(fp)}.trans.html"
+    html_file = ch.save_file(create_report_file_name)
+    generated_conclusion_files.append(html_file)
+    promote_file_to_downloadzone(html_file, rename_file=os.path.basename(html_file), chatbot=chatbot)

crazy_functions/pdf_fns/report_gen_html.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from toolbox import update_ui, get_conf, trimmed_format_exc, get_log_folder
+import os
+class construct_html():
+    def __init__(self) -> None:
+        self.html_string = ""
+    def add_row(self, a, b):
+        from toolbox import markdown_convertion
+        template = """
+            {
+                primary_col: {
+                    header: String.raw`__PRIMARY_HEADER__`,
+                    msg: String.raw`__PRIMARY_MSG__`,
+                },
+                secondary_rol: {
+                    header: String.raw`__SECONDARY_HEADER__`,
+                    msg: String.raw`__SECONDARY_MSG__`,
+                }
+            },
+        """
+        def std(str):
+            str = str.replace(r'`',r'&#96;')
+            if str.endswith("\\"): str += ' '
+            if str.endswith("}"): str += ' '
+            if str.endswith("$"): str += ' '
+            return str
+        template_ = template
+        a_lines = a.split('\n')
+        b_lines = b.split('\n')
+        if len(a_lines) == 1 or len(a_lines[0]) > 50:
+            template_ = template_.replace("__PRIMARY_HEADER__", std(a[:20]))
+            template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion(a)))
+        else:
+            template_ = template_.replace("__PRIMARY_HEADER__", std(a_lines[0]))
+            template_ = template_.replace("__PRIMARY_MSG__", std(markdown_convertion('\n'.join(a_lines[1:]))))
+        if len(b_lines) == 1 or len(b_lines[0]) > 50:
+            template_ = template_.replace("__SECONDARY_HEADER__", std(b[:20]))
+            template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion(b)))
+        else:
+            template_ = template_.replace("__SECONDARY_HEADER__", std(b_lines[0]))
+            template_ = template_.replace("__SECONDARY_MSG__", std(markdown_convertion('\n'.join(b_lines[1:]))))
+        self.html_string += template_
+    def save_file(self, file_name):
+        from toolbox import get_log_folder
+        with open('crazy_functions/pdf_fns/report_template.html', 'r', encoding='utf8') as f:
+            html_template = f.read()
+        html_template = html_template.replace("__TF_ARR__", self.html_string)
+        with open(os.path.join(get_log_folder(), file_name), 'w', encoding='utf8') as f:
+            f.write(html_template.encode('utf-8', 'ignore').decode())
+        return os.path.join(get_log_folder(), file_name)

crazy_functions/pdf_fns/report_template.html ADDED Viewed

The diff for this file is too large to render. See raw diff

crazy_functions/vt_fns/vt_call_plugin.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from pydantic import BaseModel, Field
+from typing import List
+from toolbox import update_ui_lastest_msg, disable_auto_promotion
+from request_llms.bridge_all import predict_no_ui_long_connection
+from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
+import copy, json, pickle, os, sys, time
+def read_avail_plugin_enum():
+    from crazy_functional import get_crazy_functions
+    plugin_arr = get_crazy_functions()
+    # remove plugins with out explaination
+    plugin_arr = {k:v for k, v in plugin_arr.items() if 'Info' in v}
+    plugin_arr_info = {"F_{:04d}".format(i):v["Info"] for i, v in enumerate(plugin_arr.values(), start=1)}
+    plugin_arr_dict = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
+    plugin_arr_dict_parse = {"F_{:04d}".format(i):v for i, v in enumerate(plugin_arr.values(), start=1)}
+    plugin_arr_dict_parse.update({f"F_{i}":v for i, v in enumerate(plugin_arr.values(), start=1)})
+    prompt = json.dumps(plugin_arr_info, ensure_ascii=False, indent=2)
+    prompt = "\n\nThe defination of PluginEnum:\nPluginEnum=" + prompt
+    return prompt, plugin_arr_dict, plugin_arr_dict_parse
+def wrap_code(txt):
+    txt = txt.replace('```','')
+    return f"\n```\n{txt}\n```\n"
+def have_any_recent_upload_files(chatbot):
+    _5min = 5 * 60
+    if not chatbot: return False    # chatbot is None
+    most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
+    if not most_recent_uploaded: return False   # most_recent_uploaded is None
+    if time.time() - most_recent_uploaded["time"] < _5min: return True # most_recent_uploaded is new
+    else: return False  # most_recent_uploaded is too old
+def get_recent_file_prompt_support(chatbot):
+    most_recent_uploaded = chatbot._cookies.get("most_recent_uploaded", None)
+    path = most_recent_uploaded['path']
+    prompt =   "\nAdditional Information:\n"
+    prompt =   "In case that this plugin requires a path or a file as argument,"
+    prompt += f"it is important for you to know that the user has recently uploaded a file, located at: `{path}`"
+    prompt += f"Only use it when necessary, otherwise, you can ignore this file."
+    return prompt
+def get_inputs_show_user(inputs, plugin_arr_enum_prompt):
+    # remove plugin_arr_enum_prompt from inputs string
+    inputs_show_user = inputs.replace(plugin_arr_enum_prompt, "")
+    inputs_show_user += plugin_arr_enum_prompt[:200] + '...'
+    inputs_show_user += '\n...\n'
+    inputs_show_user += '...\n'
+    inputs_show_user += '...}'
+    return inputs_show_user
+def execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
+    plugin_arr_enum_prompt, plugin_arr_dict, plugin_arr_dict_parse = read_avail_plugin_enum()
+    class Plugin(BaseModel):
+        plugin_selection: str = Field(description="The most related plugin from one of the PluginEnum.", default="F_0000")
+        reason_of_selection: str = Field(description="The reason why you should select this plugin.", default="This plugin satisfy user requirement most")
+    # ⭐ ⭐ ⭐ 选择插件
+    yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n查找可用插件中...", chatbot=chatbot, history=history, delay=0)
+    gpt_json_io = GptJsonIO(Plugin)
+    gpt_json_io.format_instructions = "The format of your output should be a json that can be parsed by json.loads.\n"
+    gpt_json_io.format_instructions += """Output example: {"plugin_selection":"F_1234", "reason_of_selection":"F_1234 plugin satisfy user requirement most"}\n"""
+    gpt_json_io.format_instructions += "The plugins you are authorized to use are listed below:\n"
+    gpt_json_io.format_instructions += plugin_arr_enum_prompt
+    inputs = "Choose the correct plugin according to user requirements, the user requirement is: \n\n" + \
+             ">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
+    run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
+        inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
+    try:
+        gpt_reply = run_gpt_fn(inputs, "")
+        plugin_sel = gpt_json_io.generate_output_auto_repair(gpt_reply, run_gpt_fn)
+    except JsonStringError:
+        msg = f"抱歉, {llm_kwargs['llm_model']}无法理解您的需求。"
+        msg += "请求的Prompt为：\n" + wrap_code(get_inputs_show_user(inputs, plugin_arr_enum_prompt))
+        msg += "语言模型回复为：\n" + wrap_code(gpt_reply)
+        msg += "\n但您可以尝试再试一次\n"
+        yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
+        return
+    if plugin_sel.plugin_selection not in plugin_arr_dict_parse:
+        msg = f"抱歉, 找不到合适插件执行该任务, 或者{llm_kwargs['llm_model']}无法理解您的需求。"
+        msg += f"语言模型{llm_kwargs['llm_model']}选择了不存在的插件：\n" + wrap_code(gpt_reply)
+        msg += "\n但您可以尝试再试一次\n"
+        yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
+        return
+    # ⭐ ⭐ ⭐ 确认插件参数
+    if not have_any_recent_upload_files(chatbot):
+        appendix_info = ""
+    else:
+        appendix_info = get_recent_file_prompt_support(chatbot)
+    plugin = plugin_arr_dict_parse[plugin_sel.plugin_selection]
+    yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n提取插件参数...", chatbot=chatbot, history=history, delay=0)
+    class PluginExplicit(BaseModel):
+        plugin_selection: str = plugin_sel.plugin_selection
+        plugin_arg: str = Field(description="The argument of the plugin.", default="")
+    gpt_json_io = GptJsonIO(PluginExplicit)
+    gpt_json_io.format_instructions += "The information about this plugin is:" + plugin["Info"]
+    inputs = f"A plugin named {plugin_sel.plugin_selection} is selected, " + \
+             "you should extract plugin_arg from the user requirement, the user requirement is: \n\n" + \
+             ">> " + (txt + appendix_info).rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
+             gpt_json_io.format_instructions
+    run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
+        inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
+    plugin_sel = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
+    # ⭐ ⭐ ⭐ 执行插件
+    fn = plugin['Function']
+    fn_name = fn.__name__
+    msg = f'{llm_kwargs["llm_model"]}为您选择了插件: `{fn_name}`\n\n插件说明：{plugin["Info"]}\n\n插件参数：{plugin_sel.plugin_arg}\n\n假如偏离了您的要求，按停止键终止。'
+    yield from update_ui_lastest_msg(lastmsg=msg, chatbot=chatbot, history=history, delay=2)
+    yield from fn(plugin_sel.plugin_arg, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, -1)
+    return

crazy_functions/vt_fns/vt_modify_config.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from pydantic import BaseModel, Field
+from typing import List
+from toolbox import update_ui_lastest_msg, get_conf
+from request_llms.bridge_all import predict_no_ui_long_connection
+from crazy_functions.json_fns.pydantic_io import GptJsonIO
+import copy, json, pickle, os, sys
+def modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
+    ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
+    if not ALLOW_RESET_CONFIG:
+        yield from update_ui_lastest_msg(
+            lastmsg=f"当前配置不允许被修改！如需激活本功能，请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
+            chatbot=chatbot, history=history, delay=2
+        )
+        return
+    # ⭐ ⭐ ⭐ 读取可配置项目条目
+    names = {}
+    from enum import Enum
+    import config
+    for k, v in config.__dict__.items():
+        if k.startswith('__'): continue
+        names.update({k:k})
+        # if len(names) > 20: break   # 限制最多前10个配置项，如果太多了会导致gpt无法理解
+    ConfigOptions = Enum('ConfigOptions', names)
+    class ModifyConfigurationIntention(BaseModel):
+        which_config_to_modify: ConfigOptions = Field(description="the name of the configuration to modify, you must choose from one of the ConfigOptions enum.", default=None)
+        new_option_value: str = Field(description="the new value of the option", default=None)
+    # ⭐ ⭐ ⭐ 分析用户意图
+    yield from update_ui_lastest_msg(lastmsg=f"正在执行任务: {txt}\n\n读取新配置中", chatbot=chatbot, history=history, delay=0)
+    gpt_json_io = GptJsonIO(ModifyConfigurationIntention)
+    inputs = "Analyze how to change configuration according to following user input, answer me with json: \n\n" + \
+             ">> " + txt.rstrip('\n').replace('\n','\n>> ') + '\n\n' + \
+             gpt_json_io.format_instructions
+    run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
+        inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
+    user_intention = gpt_json_io.generate_output_auto_repair(run_gpt_fn(inputs, ""), run_gpt_fn)
+    explicit_conf = user_intention.which_config_to_modify.value
+    ok = (explicit_conf in txt)
+    if ok:
+        yield from update_ui_lastest_msg(
+            lastmsg=f"正在执行任务: {txt}\n\n新配置{explicit_conf}={user_intention.new_option_value}",
+            chatbot=chatbot, history=history, delay=1
+        )
+        yield from update_ui_lastest_msg(
+            lastmsg=f"正在执行任务: {txt}\n\n新配置{explicit_conf}={user_intention.new_option_value}\n\n正在修改配置中",
+            chatbot=chatbot, history=history, delay=2
+        )
+        # ⭐ ⭐ ⭐ 立即应用配置
+        from toolbox import set_conf
+        set_conf(explicit_conf, user_intention.new_option_value)
+        yield from update_ui_lastest_msg(
+            lastmsg=f"正在执行任务: {txt}\n\n配置修改完成，重新页面即可生效。", chatbot=chatbot, history=history, delay=1
+        )
+    else:
+        yield from update_ui_lastest_msg(
+            lastmsg=f"失败，如果需要配置{explicit_conf}，您需要明确说明并在指令中提到它。", chatbot=chatbot, history=history, delay=5
+        )
+def modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
+    ALLOW_RESET_CONFIG = get_conf('ALLOW_RESET_CONFIG')
+    if not ALLOW_RESET_CONFIG:
+        yield from update_ui_lastest_msg(
+            lastmsg=f"当前配置不允许被修改！如需激活本功能，请在config.py中设置ALLOW_RESET_CONFIG=True后重启软件。",
+            chatbot=chatbot, history=history, delay=2
+        )
+        return
+    yield from modify_configuration_hot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
+    yield from update_ui_lastest_msg(
+        lastmsg=f"正在执行任务: {txt}\n\n配置修改完成，五秒后即将重启！若出现报错请无视即可。", chatbot=chatbot, history=history, delay=5
+    )
+    os.execl(sys.executable, sys.executable, *sys.argv)

crazy_functions/vt_fns/vt_state.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import pickle
+class VoidTerminalState():
+    def __init__(self):
+        self.reset_state()
+    def reset_state(self):
+        self.has_provided_explaination = False
+    def lock_plugin(self, chatbot):
+        chatbot._cookies['lock_plugin'] = 'crazy_functions.虚空终端->虚空终端'
+        chatbot._cookies['plugin_state'] = pickle.dumps(self)
+    def unlock_plugin(self, chatbot):
+        self.reset_state()
+        chatbot._cookies['lock_plugin'] = None
+        chatbot._cookies['plugin_state'] = pickle.dumps(self)
+    def set_state(self, chatbot, key, value):
+        setattr(self, key, value)
+        chatbot._cookies['plugin_state'] = pickle.dumps(self)
+    def get_state(chatbot):
+        state = chatbot._cookies.get('plugin_state', None)
+        if state is not None:   state = pickle.loads(state)
+        else:                   state = VoidTerminalState()
+        state.chatbot = chatbot
+        return state

crazy_functions/命令行助手.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from toolbox import CatchException, update_ui, gen_time_str
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+from .crazy_utils import input_clipping
+import copy, json
+@CatchException
+def 命令行助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
+    plugin_kwargs   插件模型的参数, 暂时没有用武之地
+    chatbot         聊天显示框的句柄, 用于显示给用户
+    history         聊天历史, 前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    # 清空历史, 以免输入溢出
+    history = []
+    # 输入
+    i_say = "请写bash命令实现以下功能：" + txt
+    # 开始
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=txt,
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
+        sys_prompt="你是一个Linux大师级用户。注意，当我要求你写bash命令时，尽可能地仅用一行命令解决我的要求。"
+    )
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

crazy_functions/对话历史存档.py ADDED Viewed

	@@ -0,0 +1,152 @@

+from toolbox import CatchException, update_ui, promote_file_to_downloadzone, get_log_folder, get_user
+import re
+f_prefix = 'GPT-Academic对话存档'
+def write_chat_to_file(chatbot, history=None, file_name=None):
+    """
+    将对话记录history以Markdown格式写入文件中。如果没有指定文件名，则使用当前时间生成文件名。
+    """
+    import os
+    import time
+    if file_name is None:
+        file_name = f_prefix + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
+    fp = os.path.join(get_log_folder(get_user(chatbot), plugin_name='chat_history'), file_name)
+    with open(fp, 'w', encoding='utf8') as f:
+        from themes.theme import advanced_css
+        f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
+        for i, contents in enumerate(chatbot):
+            for j, content in enumerate(contents):
+                try:    # 这个bug没找到触发条件，暂时先这样顶一下
+                    if type(content) != str: content = str(content)
+                except:
+                    continue
+                f.write(content)
+                if j == 0:
+                    f.write('<hr style="border-top: dotted 3px #ccc;">')
+            f.write('<hr color="red"> \n\n')
+        f.write('<hr color="blue"> \n\n raw chat context:\n')
+        f.write('<code>')
+        for h in history:
+            f.write("\n>>>" + h)
+        f.write('</code>')
+    promote_file_to_downloadzone(fp, rename_file=file_name, chatbot=chatbot)
+    return '对话历史写入：' + fp
+def gen_file_preview(file_name):
+    try:
+        with open(file_name, 'r', encoding='utf8') as f:
+            file_content = f.read()
+        # pattern to match the text between <head> and </head>
+        pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
+        file_content = re.sub(pattern, '', file_content)
+        html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
+        history = history.strip('<code>')
+        history = history.strip('</code>')
+        history = history.split("\n>>>")
+        return list(filter(lambda x:x!="", history))[0][:100]
+    except:
+        return ""
+def read_file_to_chat(chatbot, history, file_name):
+    with open(file_name, 'r', encoding='utf8') as f:
+        file_content = f.read()
+    # pattern to match the text between <head> and </head>
+    pattern = re.compile(r'<head>.*?</head>', flags=re.DOTALL)
+    file_content = re.sub(pattern, '', file_content)
+    html, history = file_content.split('<hr color="blue"> \n\n raw chat context:\n')
+    history = history.strip('<code>')
+    history = history.strip('</code>')
+    history = history.split("\n>>>")
+    history = list(filter(lambda x:x!="", history))
+    html = html.split('<hr color="red"> \n\n')
+    html = list(filter(lambda x:x!="", html))
+    chatbot.clear()
+    for i, h in enumerate(html):
+        i_say, gpt_say = h.split('<hr style="border-top: dotted 3px #ccc;">')
+        chatbot.append([i_say, gpt_say])
+    chatbot.append([f"存档文件详情？", f"[Local Message] 载入对话{len(html)}条，上下文{len(history)}条。"])
+    return chatbot, history
+@CatchException
+def 对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    chatbot.append(("保存当前对话",
+        f"[Local Message] {write_chat_to_file(chatbot, history)}，您可以调用下拉菜单中的“载入对话历史存档”还原当下的对话。"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+def hide_cwd(str):
+    import os
+    current_path = os.getcwd()
+    replace_path = "."
+    return str.replace(current_path, replace_path)
+@CatchException
+def 载入对话历史存档(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    from .crazy_utils import get_files_from_everything
+    success, file_manifest, _ = get_files_from_everything(txt, type='.html')
+    if not success:
+        if txt == "": txt = '空空如也的输入栏'
+        import glob
+        local_history = "<br/>".join([
+            "`"+hide_cwd(f)+f" ({gen_file_preview(f)})"+"`"
+            for f in glob.glob(
+                f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html',
+                recursive=True
+            )])
+        chatbot.append([f"正在查找对话历史文件（html格式）: {txt}", f"找不到任何html文件: {txt}。但本地存储了以下历史文件，您可以将任意一个文件路径粘贴到输入区，然后重试：<br/>{local_history}"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    try:
+        chatbot, history = read_file_to_chat(chatbot, history, file_manifest[0])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    except:
+        chatbot.append([f"载入对话历史文件", f"对话历史文件损坏！"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+@CatchException
+def 删除所有本地对话历史记录(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    import glob, os
+    local_history = "<br/>".join([
+        "`"+hide_cwd(f)+"`"
+        for f in glob.glob(
+            f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True
+        )])
+    for f in glob.glob(f'{get_log_folder(get_user(chatbot), plugin_name="chat_history")}/**/{f_prefix}*.html', recursive=True):
+        os.remove(f)
+    chatbot.append([f"删除所有历史对话文件", f"已删除<br/>{local_history}"])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    return

crazy_functions/生成函数注释.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from toolbox import update_ui
+from toolbox import CatchException, report_exception
+from toolbox import write_history_to_file, promote_file_to_downloadzone
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+fast_debug = False
+def 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
+    import time, os
+    print('begin analysis on:', file_manifest)
+    for index, fp in enumerate(file_manifest):
+        with open(fp, 'r', encoding='utf-8', errors='replace') as f:
+            file_content = f.read()
+        i_say = f'请对下面的程序文件做一个概述，并对文件中的所有函数生成注释，使用markdown表格输出结果，文件名是{os.path.relpath(fp, project_folder)}，文件内容是 ```{file_content}```'
+        i_say_show_user = f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述，并对文件中的所有函数生成注释: {os.path.abspath(fp)}'
+        chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        if not fast_debug:
+            msg = '正常'
+            # ** gpt request **
+            gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+                i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt)   # 带超时倒计时
+            chatbot[-1] = (i_say_show_user, gpt_say)
+            history.append(i_say_show_user); history.append(gpt_say)
+            yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
+            if not fast_debug: time.sleep(2)
+    if not fast_debug:
+        res = write_history_to_file(history)
+        promote_file_to_downloadzone(res, chatbot=chatbot)
+        chatbot.append(("完成了吗？", res))
+        yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
+@CatchException
+def 批量生成函数注释(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 生成函数注释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

crazy_functions/联网的ChatGPT.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
+import requests
+from bs4 import BeautifulSoup
+from request_llms.bridge_all import model_info
+def google(query, proxies):
+    query = query # 在此处替换您要搜索的关键词
+    url = f"https://www.google.com/search?q={query}"
+    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
+    response = requests.get(url, headers=headers, proxies=proxies)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    results = []
+    for g in soup.find_all('div', class_='g'):
+        anchors = g.find_all('a')
+        if anchors:
+            link = anchors[0]['href']
+            if link.startswith('/url?q='):
+                link = link[7:]
+            if not link.startswith('http'):
+                continue
+            title = g.find('h3').text
+            item = {'title': title, 'link': link}
+            results.append(item)
+    for r in results:
+        print(r['link'])
+    return results
+def scrape_text(url, proxies) -> str:
+    """Scrape text from a webpage
+    Args:
+        url (str): The URL to scrape text from
+    Returns:
+        str: The scraped text
+    """
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
+        'Content-Type': 'text/plain',
+    }
+    try:
+        response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
+        if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
+    except:
+        return "无法连接到该网页"
+    soup = BeautifulSoup(response.text, "html.parser")
+    for script in soup(["script", "style"]):
+        script.extract()
+    text = soup.get_text()
+    lines = (line.strip() for line in text.splitlines())
+    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+    text = "\n".join(chunk for chunk in chunks if chunk)
+    return text
+@CatchException
+def 连接网络回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append((f"请结合互联网信息回答以下问题：{txt}",
+                    "[Local Message] 请注意，您正在调用一个[函数插件]的模板，该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者，它可以作为创建新功能函数的模板。您若希望分享新的功能模组，请不吝PR！"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+    # ------------- < 第1步：爬取搜索引擎的结果 > -------------
+    from toolbox import get_conf
+    proxies = get_conf('proxies')
+    urls = google(txt, proxies)
+    history = []
+    if len(urls) == 0:
+        chatbot.append((f"结论：{txt}",
+                        "[Local Message] 受到google限制，无法从google获取信息！"))
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+        return
+    # ------------- < 第2步：依次访问网页 > -------------
+    max_search_result = 5   # 最多收纳多少个网页的结果
+    for index, url in enumerate(urls[:max_search_result]):
+        res = scrape_text(url['link'], proxies)
+        history.extend([f"第{index}份搜索结果：", res])
+        chatbot.append([f"第{index}份搜索结果：", res[:500]+"......"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+    # ------------- < 第3步：ChatGPT综合 > -------------
+    i_say = f"从以上搜索结果中抽取信息，然后回答问题：{txt}"
+    i_say, history = input_clipping(    # 裁剪输入，从最长的条目开始裁剪，防止爆token
+        inputs=i_say,
+        history=history,
+        max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
+    )
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=i_say,
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+        sys_prompt="请从给定的若干条搜索结果中抽取信息，对最相关的两个搜索结果进行总结，然后回答问题。"
+    )
+    chatbot[-1] = (i_say, gpt_say)
+    history.append(i_say);history.append(gpt_say)
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

crazy_functions/联网的ChatGPT_bing版.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
+import requests
+from bs4 import BeautifulSoup
+from request_llms.bridge_all import model_info
+def bing_search(query, proxies=None):
+    query = query
+    url = f"https://cn.bing.com/search?q={query}"
+    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
+    response = requests.get(url, headers=headers, proxies=proxies)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    results = []
+    for g in soup.find_all('li', class_='b_algo'):
+        anchors = g.find_all('a')
+        if anchors:
+            link = anchors[0]['href']
+            if not link.startswith('http'):
+                continue
+            title = g.find('h2').text
+            item = {'title': title, 'link': link}
+            results.append(item)
+    for r in results:
+        print(r['link'])
+    return results
+def scrape_text(url, proxies) -> str:
+    """Scrape text from a webpage
+    Args:
+        url (str): The URL to scrape text from
+    Returns:
+        str: The scraped text
+    """
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
+        'Content-Type': 'text/plain',
+    }
+    try:
+        response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
+        if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
+    except:
+        return "无法连接到该网页"
+    soup = BeautifulSoup(response.text, "html.parser")
+    for script in soup(["script", "style"]):
+        script.extract()
+    text = soup.get_text()
+    lines = (line.strip() for line in text.splitlines())
+    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+    text = "\n".join(chunk for chunk in chunks if chunk)
+    return text
+@CatchException
+def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append((f"请结合互联网信息回答以下问题：{txt}",
+                    "[Local Message] 请注意，您正在调用一个[函数插件]的模板，该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者，它可以作为创建新功能函数的模板。您若希望分享新的功能模组，请不吝PR！"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+    # ------------- < 第1步：爬取搜索引擎的结果 > -------------
+    from toolbox import get_conf
+    proxies = get_conf('proxies')
+    urls = bing_search(txt, proxies)
+    history = []
+    if len(urls) == 0:
+        chatbot.append((f"结论：{txt}",
+                        "[Local Message] 受到bing限制，无法从bing获取信息！"))
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+        return
+    # ------------- < 第2步：依次访问网页 > -------------
+    max_search_result = 8   # 最多收纳多少个网页的结果
+    for index, url in enumerate(urls[:max_search_result]):
+        res = scrape_text(url['link'], proxies)
+        history.extend([f"第{index}份搜索结果：", res])
+        chatbot.append([f"第{index}份搜索结果：", res[:500]+"......"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+    # ------------- < 第3步：ChatGPT综合 > -------------
+    i_say = f"从以上搜索结果中抽取信息，然后回答问题：{txt}"
+    i_say, history = input_clipping(    # 裁剪输入，从最长的条目开始裁剪，防止爆token
+        inputs=i_say,
+        history=history,
+        max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
+    )
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=i_say,
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+        sys_prompt="请从给定的若干条搜索结果中抽取信息，对最相关的两个搜索结果进行���结，然后回答问题。"
+    )
+    chatbot[-1] = (i_say, gpt_say)
+    history.append(i_say);history.append(gpt_say)
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

crazy_functions/虚空终端.py ADDED Viewed

	@@ -0,0 +1,180 @@

+"""
+Explanation of the Void Terminal Plugin:
+Please describe in natural language what you want to do.
+1. You can open the plugin's dropdown menu to explore various capabilities of this project, and then describe your needs in natural language, for example:
+- "Please call the plugin to translate a PDF paper for me. I just uploaded the paper to the upload area."
+- "Please use the plugin to translate a PDF paper, with the address being https://www.nature.com/articles/s41586-019-1724-z.pdf."
+- "Generate an image with blooming flowers and lush green grass using the plugin."
+- "Translate the README using the plugin. The GitHub URL is https://github.com/facebookresearch/co-tracker."
+- "Translate an Arxiv paper for me. The Arxiv ID is 1812.10695. Remember to use the plugin and don't do it manually!"
+- "I don't like the current interface color. Modify the configuration and change the theme to THEME="High-Contrast"."
+- "Could you please explain the structure of the Transformer network?"
+2. If you use keywords like "call the plugin xxx", "modify the configuration xxx", "please", etc., your intention can be recognized more accurately.
+3. Your intention can be recognized more accurately when using powerful models like GPT4. This plugin is relatively new, so please feel free to provide feedback on GitHub.
+4. Now, if you need to process a file, please upload the file (drag the file to the file upload area) or describe the path to the file.
+5. If you don't need to upload a file, you can simply repeat your command again.
+"""
+explain_msg = """
+## 虚空终端插件说明:
+1. 请用**自然语言**描述您需要做什么。例如：
+    - 「请调用插件，为我翻译PDF论文，论文我刚刚放到上传区了」
+    - 「请调用插件翻译PDF论文，地址为https://openreview.net/pdf?id=rJl0r3R9KX」
+    - 「把Arxiv论文翻译成中文PDF，arxiv论文的ID是1812.10695，记得用插件！」
+    - 「生成一张图片，图中鲜花怒放，绿草如茵，用插件实现」
+    - 「用插件翻译README，Github网址是https://github.com/facebookresearch/co-tracker」
+    - 「我不喜欢当前的界面颜色，修改配置，把主题THEME更换为THEME="High-Contrast"」
+    - 「请调用插件，解析python源代码项目，代码我刚刚打包拖到上传区了」
+    - 「请问Transformer网络的结构是怎样的？」
+2. 您可以打开插件下拉菜单以了解本项目的各种能力。
+3. 如果您使用「调用插件xxx」、「修改配置xxx」、「请问」等关键词，您的意图可以被识别的更准确。
+4. 建议使用 GPT3.5 或更强的模型，弱模型可能无法理解您的想法。该插件诞生时间不长，欢迎您前往Github反馈问题。
+5. 现在，如果需要处理文件，请您上传文件（将文件拖动到文件上传区），或者描述文件所在的路径。
+6. 如果不需要上传文件，现在您只需要再次重复一次您的指令即可。
+"""
+from pydantic import BaseModel, Field
+from typing import List
+from toolbox import CatchException, update_ui, is_the_upload_folder
+from toolbox import update_ui_lastest_msg, disable_auto_promotion
+from request_llms.bridge_all import predict_no_ui_long_connection
+from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+from crazy_functions.crazy_utils import input_clipping
+from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
+from crazy_functions.vt_fns.vt_state import VoidTerminalState
+from crazy_functions.vt_fns.vt_modify_config import modify_configuration_hot
+from crazy_functions.vt_fns.vt_modify_config import modify_configuration_reboot
+from crazy_functions.vt_fns.vt_call_plugin import execute_plugin
+class UserIntention(BaseModel):
+    user_prompt: str = Field(description="the content of user input", default="")
+    intention_type: str = Field(description="the type of user intention, choose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']", default="ExecutePlugin")
+    user_provide_file: bool = Field(description="whether the user provides a path to a file", default=False)
+    user_provide_url: bool = Field(description="whether the user provides a url", default=False)
+def chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention):
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=txt, inputs_show_user=txt,
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
+        sys_prompt=system_prompt
+    )
+    chatbot[-1] = [txt, gpt_say]
+    history.extend([txt, gpt_say])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    pass
+explain_intention_to_user = {
+    'Chat': "聊天对话",
+    'ExecutePlugin': "调用插件",
+    'ModifyConfiguration': "修改配置",
+}
+def analyze_intention_with_simple_rules(txt):
+    user_intention = UserIntention()
+    user_intention.user_prompt = txt
+    is_certain = False
+    if '请问' in txt:
+        is_certain = True
+        user_intention.intention_type = 'Chat'
+    if '用插件' in txt:
+        is_certain = True
+        user_intention.intention_type = 'ExecutePlugin'
+    if '修改配置' in txt:
+        is_certain = True
+        user_intention.intention_type = 'ModifyConfiguration'
+    return is_certain, user_intention
+@CatchException
+def 虚空终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    disable_auto_promotion(chatbot=chatbot)
+    # 获取当前虚空终端状态
+    state = VoidTerminalState.get_state(chatbot)
+    appendix_msg = ""
+    # 用简单的关键词检测用户意图
+    is_certain, _ = analyze_intention_with_simple_rules(txt)
+    if is_the_upload_folder(txt):
+        state.set_state(chatbot=chatbot, key='has_provided_explaination', value=False)
+        appendix_msg = "\n\n**很好，您已经上传了文件**，现在请您描述您的需求。"
+    if is_certain or (state.has_provided_explaination):
+        # 如果意图明确，跳过提示环节
+        state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
+        state.unlock_plugin(chatbot=chatbot)
+        yield from update_ui(chatbot=chatbot, history=history)
+        yield from 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
+        return
+    else:
+        # 如果意图模糊，提示
+        state.set_state(chatbot=chatbot, key='has_provided_explaination', value=True)
+        state.lock_plugin(chatbot=chatbot)
+        chatbot.append(("虚空终端状态:", explain_msg+appendix_msg))
+        yield from update_ui(chatbot=chatbot, history=history)
+        return
+def 虚空终端主路由(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []
+    chatbot.append(("虚空终端状态: ", f"正在执行任务: {txt}"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    # ⭐ ⭐ ⭐ 分析用户意图
+    is_certain, user_intention = analyze_intention_with_simple_rules(txt)
+    if not is_certain:
+        yield from update_ui_lastest_msg(
+            lastmsg=f"正在执行任务: {txt}\n\n分析用户意图中", chatbot=chatbot, history=history, delay=0)
+        gpt_json_io = GptJsonIO(UserIntention)
+        rf_req = "\nchoose from ['ModifyConfiguration', 'ExecutePlugin', 'Chat']"
+        inputs = "Analyze the intention of the user according to following user input: \n\n" + \
+            ">> " + (txt+rf_req).rstrip('\n').replace('\n','\n>> ') + '\n\n' + gpt_json_io.format_instructions
+        run_gpt_fn = lambda inputs, sys_prompt: predict_no_ui_long_connection(
+            inputs=inputs, llm_kwargs=llm_kwargs, history=[], sys_prompt=sys_prompt, observe_window=[])
+        analyze_res = run_gpt_fn(inputs, "")
+        try:
+            user_intention = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
+            lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
+        except JsonStringError as e:
+            yield from update_ui_lastest_msg(
+                lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 失败 当前语言模型（{llm_kwargs['llm_model']}）不能理解您的意图", chatbot=chatbot, history=history, delay=0)
+            return
+    else:
+        pass
+    yield from update_ui_lastest_msg(
+        lastmsg=f"正在执行任务: {txt}\n\n用户意图理解: 意图={explain_intention_to_user[user_intention.intention_type]}",
+        chatbot=chatbot, history=history, delay=0)
+    # 用户意图: 修改本项目的配置
+    if user_intention.intention_type == 'ModifyConfiguration':
+        yield from modify_configuration_reboot(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
+    # 用户意图: 调度插件
+    if user_intention.intention_type == 'ExecutePlugin':
+        yield from execute_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
+    # 用户意图: 聊天
+    if user_intention.intention_type == 'Chat':
+        yield from chat(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_intention)
+    return

crazy_functions/解析JupyterNotebook.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from toolbox import update_ui
+from toolbox import CatchException, report_exception
+from toolbox import write_history_to_file, promote_file_to_downloadzone
+fast_debug = True
+class PaperFileGroup():
+    def __init__(self):
+        self.file_paths = []
+        self.file_contents = []
+        self.sp_file_contents = []
+        self.sp_file_index = []
+        self.sp_file_tag = []
+    def run_file_split(self, max_token_limit=1900):
+        """
+        将长文本分离开来
+        """
+        for index, file_content in enumerate(self.file_contents):
+            if self.get_token_num(file_content) < max_token_limit:
+                self.sp_file_contents.append(file_content)
+                self.sp_file_index.append(index)
+                self.sp_file_tag.append(self.file_paths[index])
+            else:
+                from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
+                segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
+                for j, segment in enumerate(segments):
+                    self.sp_file_contents.append(segment)
+                    self.sp_file_index.append(index)
+                    self.sp_file_tag.append(
+                        self.file_paths[index] + f".part-{j}.txt")
+def parseNotebook(filename, enable_markdown=1):
+    import json
+    CodeBlocks = []
+    with open(filename, 'r', encoding='utf-8', errors='replace') as f:
+        notebook = json.load(f)
+    for cell in notebook['cells']:
+        if cell['cell_type'] == 'code' and cell['source']:
+            # remove blank lines
+            cell['source'] = [line for line in cell['source'] if line.strip()
+                              != '']
+            CodeBlocks.append("".join(cell['source']))
+        elif enable_markdown and cell['cell_type'] == 'markdown' and cell['source']:
+            cell['source'] = [line for line in cell['source'] if line.strip()
+                              != '']
+            CodeBlocks.append("Markdown:"+"".join(cell['source']))
+    Code = ""
+    for idx, code in enumerate(CodeBlocks):
+        Code += f"This is {idx+1}th code block: \n"
+        Code += code+"\n"
+    return Code
+def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
+    from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    enable_markdown = plugin_kwargs.get("advanced_arg", "1")
+    try:
+        enable_markdown = int(enable_markdown)
+    except ValueError:
+        enable_markdown = 1
+    pfg = PaperFileGroup()
+    for fp in file_manifest:
+        file_content = parseNotebook(fp, enable_markdown=enable_markdown)
+        pfg.file_paths.append(fp)
+        pfg.file_contents.append(file_content)
+    #  <-------- 拆分过长的IPynb文件 ---------->
+    pfg.run_file_split(max_token_limit=1024)
+    n_split = len(pfg.sp_file_contents)
+    inputs_array = [r"This is a Jupyter Notebook file, tell me about Each Block in Chinese. Focus Just On Code." +
+                    r"If a block starts with `Markdown` which means it's a markdown block in ipynbipynb. " +
+                    r"Start a new line for a block and block num use Chinese." +
+                    f"\n\n{frag}" for frag in pfg.sp_file_contents]
+    inputs_show_user_array = [f"{f}的分析如下" for f in pfg.sp_file_tag]
+    sys_prompt_array = ["You are a professional programmer."] * n_split
+    gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+        inputs_array=inputs_array,
+        inputs_show_user_array=inputs_show_user_array,
+        llm_kwargs=llm_kwargs,
+        chatbot=chatbot,
+        history_array=[[""] for _ in range(n_split)],
+        sys_prompt_array=sys_prompt_array,
+        # max_workers=5,  # OpenAI所允许的最大并行过载
+        scroller_max_len=80
+    )
+    #  <-------- 整理结果，退出 ---------->
+    block_result = "  \n".join(gpt_response_collection)
+    chatbot.append(("解析的结果如下", block_result))
+    history.extend(["解析的结果如下", block_result])
+    yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+    #  <-------- 写入文件，退出 ---------->
+    res = write_history_to_file(history)
+    promote_file_to_downloadzone(res, chatbot=chatbot)
+    chatbot.append(("完成了吗？", res))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+@CatchException
+def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    chatbot.append([
+        "函数插件功能？",
+        "对IPynb文件进行解析。Contributor: codycjy."])
+    yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+    history = []    # 清空历史
+    import glob
+    import os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "":
+            txt = '空空如也的输入栏'
+        report_exception(chatbot, history,
+                         a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+        return
+    if txt.endswith('.ipynb'):
+        file_manifest = [txt]
+    else:
+        file_manifest = [f for f in glob.glob(
+            f'{project_folder}/**/*.ipynb', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history,
+                         a=f"解析项目: {txt}", b=f"找不到任何.ipynb文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+        return
+    yield from ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, )

crazy_functions/解析项目源代码.py ADDED Viewed

	@@ -0,0 +1,371 @@

+from toolbox import update_ui, promote_file_to_downloadzone, disable_auto_promotion
+from toolbox import CatchException, report_exception, write_history_to_file
+from .crazy_utils import input_clipping
+def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
+    import os, copy
+    from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+    from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+    disable_auto_promotion(chatbot=chatbot)
+    summary_batch_isolation = True
+    inputs_array = []
+    inputs_show_user_array = []
+    history_array = []
+    sys_prompt_array = []
+    report_part_1 = []
+    assert len(file_manifest) <= 2048, "源文件太多（超过512个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
+    ############################## <第一步，逐个文件分析，多线程> ##################################
+    for index, fp in enumerate(file_manifest):
+        # 读取文件
+        with open(fp, 'r', encoding='utf-8', errors='replace') as f:
+            file_content = f.read()
+        prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
+        i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)}，文件代码是 ```{file_content}```'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {fp}'
+        # 装载请求内容
+        inputs_array.append(i_say)
+        inputs_show_user_array.append(i_say_show_user)
+        history_array.append([])
+        sys_prompt_array.append("你是一个程序架构分析师，正在分析一个源代码项目。你的回答必须简单明了。")
+    # 文件读取完成，对每一个源代码文件，生成一个请求线程，发送到chatgpt进行分析
+    gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+        inputs_array = inputs_array,
+        inputs_show_user_array = inputs_show_user_array,
+        history_array = history_array,
+        sys_prompt_array = sys_prompt_array,
+        llm_kwargs = llm_kwargs,
+        chatbot = chatbot,
+        show_user_at_complete = True
+    )
+    # 全部文件解析完成，结果写入文件，准备对工程源代码进行汇总分析
+    report_part_1 = copy.deepcopy(gpt_response_collection)
+    history_to_return = report_part_1
+    res = write_history_to_file(report_part_1)
+    promote_file_to_downloadzone(res, chatbot=chatbot)
+    chatbot.append(("完成？", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
+    yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
+    ############################## <第二步，综合，单线程，分组+迭代处理> ##################################
+    batchsize = 16  # 10个文件为一组
+    report_part_2 = []
+    previous_iteration_files = []
+    last_iteration_result = ""
+    while True:
+        if len(file_manifest) == 0: break
+        this_iteration_file_manifest = file_manifest[:batchsize]
+        this_iteration_gpt_response_collection = gpt_response_collection[:batchsize*2]
+        file_rel_path = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
+        # 把“请对下面的程序文件做一个概述” 替换成 精简的 "文件名：{all_file[index]}"
+        for index, content in enumerate(this_iteration_gpt_response_collection):
+            if index%2==0: this_iteration_gpt_response_collection[index] = f"{file_rel_path[index//2]}" # 只保留文件名节省token
+        this_iteration_files = [os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)]
+        previous_iteration_files.extend(this_iteration_files)
+        previous_iteration_files_string = ', '.join(previous_iteration_files)
+        current_iteration_focus = ', '.join(this_iteration_files)
+        if summary_batch_isolation: focus = current_iteration_focus
+        else:                       focus = previous_iteration_files_string
+        i_say = f'用一张Markdown表格简要描述以下文件的功能：{focus}。根据以上分析，用一句话概括程序的整体功能。'
+        if last_iteration_result != "":
+            sys_prompt_additional = "已知某些代码的局部作用是:" + last_iteration_result + "\n请继续分析其他源代码，从而更全面地理解项目的整体功能。"
+        else:
+            sys_prompt_additional = ""
+        inputs_show_user = f'根据以上分析，对程序的整体功能和构架重新做出概括，由于输入长度限制，可能需要分组处理，本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
+        this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
+        this_iteration_history.append(last_iteration_result)
+        # 裁剪input
+        inputs, this_iteration_history_feed = input_clipping(inputs=i_say, history=this_iteration_history, max_token_limit=2560)
+        result = yield from request_gpt_model_in_new_thread_with_ui_alive(
+            inputs=inputs, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
+            history=this_iteration_history_feed,   # 迭代之前的分析
+            sys_prompt="你是一个程序架构分析师，正在分析一个项目的源代码。" + sys_prompt_additional)
+        summary = "请用一句话概括这些文件的整体功能"
+        summary_result = yield from request_gpt_model_in_new_thread_with_ui_alive(
+            inputs=summary,
+            inputs_show_user=summary,
+            llm_kwargs=llm_kwargs,
+            chatbot=chatbot,
+            history=[i_say, result],   # 迭代之前的分析
+            sys_prompt="你是一个程序架构分析师，正在分析一个项目的源代码。" + sys_prompt_additional)
+        report_part_2.extend([i_say, result])
+        last_iteration_result = summary_result
+        file_manifest = file_manifest[batchsize:]
+        gpt_response_collection = gpt_response_collection[batchsize*2:]
+    ############################## <END> ##################################
+    history_to_return.extend(report_part_2)
+    res = write_history_to_file(history_to_return)
+    promote_file_to_downloadzone(res, chatbot=chatbot)
+    chatbot.append(("完成了吗？", res))
+    yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
+@CatchException
+def 解析项目本身(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob
+    file_manifest = [f for f in glob.glob('./*.py')] + \
+                    [f for f in glob.glob('./*/*.py')]
+    project_folder = './'
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个Python项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个Matlab项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.m', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析Matlab项目: {txt}", b = f"找不到任何`.m`源文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个C项目的头文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)]  + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] #+ \
+                    # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个C项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.h', recursive=True)]  + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.hpp', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.h头文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个Java项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []  # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.java', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.jar', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.sh', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何java文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个前端项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []  # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.ts', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.tsx', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.js', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.vue', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.less', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.sass', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.wxml', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.wxss', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.css', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.jsx', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何前端相关文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []  # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.go', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/go.mod', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/go.sum', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/go.work', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个Rust项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []  # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.rs', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.lock', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何golang文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.lua', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.xml', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.json', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.toml', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.cs', recursive=True)] + \
+                    [f for f in glob.glob(f'{project_folder}/**/*.csproj', recursive=True)]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+@CatchException
+def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    txt_pattern = plugin_kwargs.get("advanced_arg")
+    txt_pattern = txt_pattern.replace("，", ",")
+    # 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
+    pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
+    if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
+    # 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
+    pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
+    pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
+    # 将要忽略匹配的文件名(例如: ^README.md)
+    pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
+    # 生成正则表达式
+    pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
+    pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
+    history.clear()
+    import glob, os, re
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    # 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
+    maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
+    if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
+        extract_folder_path = maybe_dir[0]
+    else:
+        extract_folder_path = project_folder
+    # 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
+    file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
+                      os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
+    if len(file_manifest) == 0:
+        report_exception(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

crazy_functions/谷歌检索小助手.py ADDED Viewed

	@@ -0,0 +1,185 @@

+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+from toolbox import CatchException, report_exception, promote_file_to_downloadzone
+from toolbox import update_ui, update_ui_lastest_msg, disable_auto_promotion, write_history_to_file
+import logging
+import requests
+import time
+import random
+ENABLE_ALL_VERSION_SEARCH = True
+def get_meta_information(url, chatbot, history):
+    import arxiv
+    import difflib
+    import re
+    from bs4 import BeautifulSoup
+    from toolbox import get_conf
+    from urllib.parse import urlparse
+    session = requests.session()
+    proxies = get_conf('proxies')
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
+        'Accept-Encoding': 'gzip, deflate, br',
+        'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
+        'Cache-Control':'max-age=0',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+        'Connection': 'keep-alive'
+    }
+    try:
+        session.proxies.update(proxies)
+    except:
+        report_exception(chatbot, history,
+                    a=f"获取代理失败 无代理状态下很可能无法访问OpenAI家族的模型及谷歌学术 建议：检查USE_PROXY选项是否修改。",
+                    b=f"尝试直接连接")
+        yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+    session.headers.update(headers)
+    response = session.get(url)
+    # 解析网页内容
+    soup = BeautifulSoup(response.text, "html.parser")
+    def string_similar(s1, s2):
+        return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
+    if ENABLE_ALL_VERSION_SEARCH:
+        def search_all_version(url):
+            time.sleep(random.randint(1,5)) # 睡一会防止触发google反爬虫
+            response = session.get(url)
+            soup = BeautifulSoup(response.text, "html.parser")
+            for result in soup.select(".gs_ri"):
+                try:
+                    url = result.select_one(".gs_rt").a['href']
+                except:
+                    continue
+                arxiv_id = extract_arxiv_id(url)
+                if not arxiv_id:
+                    continue
+                search = arxiv.Search(
+                    id_list=[arxiv_id],
+                    max_results=1,
+                    sort_by=arxiv.SortCriterion.Relevance,
+                )
+                try: paper = next(search.results())
+                except: paper = None
+                return paper
+            return None
+        def extract_arxiv_id(url):
+            # 返回给定的url解析出的arxiv_id，如url未成功匹配返回None
+            pattern = r'arxiv.org/abs/([^/]+)'
+            match = re.search(pattern, url)
+            if match:
+                return match.group(1)
+            else:
+                return None
+    profile = []
+    # 获取所有文章的标题和作者
+    for result in soup.select(".gs_ri"):
+        title = result.a.text.replace('\n', ' ').replace('  ', ' ')
+        author = result.select_one(".gs_a").text
+        try:
+            citation = result.select_one(".gs_fl > a[href*='cites']").text  # 引用次数是链接中的文本，直接取出来
+        except:
+            citation = 'cited by 0'
+        abstract = result.select_one(".gs_rs").text.strip()  # 摘要在 .gs_rs 中的文本，需要清除首尾空格
+        # 首先在arxiv上搜索，获取文章摘要
+        search = arxiv.Search(
+            query = title,
+            max_results = 1,
+            sort_by = arxiv.SortCriterion.Relevance,
+        )
+        try: paper = next(search.results())
+        except: paper = None
+        is_match = paper is not None and string_similar(title, paper.title) > 0.90
+        # 如果在Arxiv上匹配失败，检索文章的历史版本的题目
+        if not is_match and ENABLE_ALL_VERSION_SEARCH:
+            other_versions_page_url = [tag['href'] for tag in result.select_one('.gs_flb').select('.gs_nph') if 'cluster' in tag['href']]
+            if len(other_versions_page_url) > 0:
+                other_versions_page_url = other_versions_page_url[0]
+                paper = search_all_version('http://' + urlparse(url).netloc + other_versions_page_url)
+                is_match = paper is not None and string_similar(title, paper.title) > 0.90
+        if is_match:
+            # same paper
+            abstract = paper.summary.replace('\n', ' ')
+            is_paper_in_arxiv = True
+        else:
+            # different paper
+            abstract = abstract
+            is_paper_in_arxiv = False
+        logging.info('[title]:' + title)
+        logging.info('[author]:' + author)
+        logging.info('[citation]:' + citation)
+        profile.append({
+            'title': title,
+            'author': author,
+            'citation': citation,
+            'abstract': abstract,
+            'is_paper_in_arxiv': is_paper_in_arxiv,
+        })
+        chatbot[-1] = [chatbot[-1][0], title + f'\n\n是否在arxiv中（不在arxiv中无法获取完整摘要）:{is_paper_in_arxiv}\n\n' + abstract]
+        yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+    return profile
+@CatchException
+def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    disable_auto_promotion(chatbot=chatbot)
+    # 基本信息：功能、贡献者
+    chatbot.append([
+        "函数插件功能？",
+        "分析用户提供的谷歌学术（google scholar）搜索页面中，出现的所有文章: binary-husky，插件初始化中..."])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        import arxiv
+        import math
+        from bs4 import BeautifulSoup
+    except:
+        report_exception(chatbot, history,
+            a = f"解析项目: {txt}",
+            b = f"导入软件依赖失败。使用该模块需要额外依赖，安装方法```pip install --upgrade beautifulsoup4 arxiv```。")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    # 清空历史，以免输入溢出
+    history = []
+    meta_paper_info_list = yield from get_meta_information(txt, chatbot, history)
+    if len(meta_paper_info_list) == 0:
+        yield from update_ui_lastest_msg(lastmsg='获取文献失败，可能触发了google反爬虫机制。',chatbot=chatbot, history=history, delay=0)
+        return
+    batchsize = 5
+    for batch in range(math.ceil(len(meta_paper_info_list)/batchsize)):
+        if len(meta_paper_info_list[:batchsize]) > 0:
+            i_say = "下面是一些学术文献的数据，提取出以下内容：" + \
+            "1、英文题目；2、中文题目翻译；3、作者；4、arxiv公开（is_paper_in_arxiv）；4、引用数量（cite）；5、中文摘要翻译。" + \
+            f"以下是信息源：{str(meta_paper_info_list[:batchsize])}"
+            inputs_show_user = f"请分析此页面中出现的所有文章：{txt}，这是第{batch+1}批"
+            gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+                inputs=i_say, inputs_show_user=inputs_show_user,
+                llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
+                sys_prompt="你是一个学术翻译，请从数据中提取信息。你必须使用Markdown表格。你必须逐个文献进行处理。"
+            )
+            history.extend([ f"第{batch+1}批", gpt_say ])
+            meta_paper_info_list = meta_paper_info_list[batchsize:]
+    chatbot.append(["状态？",
+        "已经全部完成，您可以试试让AI写一个Related Works，例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
+    msg = '正常'
+    yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
+    path = write_history_to_file(history)
+    promote_file_to_downloadzone(path, chatbot=chatbot)
+    chatbot.append(("完成了吗？", path));
+    yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面

crazy_functions/辅助功能.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# encoding: utf-8
+# @Time   : 2023/4/19
+# @Author : Spike
+# @Descr   :
+from toolbox import update_ui, get_conf, get_user
+from toolbox import CatchException
+from toolbox import default_user_name
+from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+import shutil
+import os
+@CatchException
+def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    if txt:
+        show_say = txt
+        prompt = txt+'\n回答完问题后，再列出用户可能提出的三个问题。'
+    else:
+        prompt = history[-1]+"\n分析上述回答，再列出用户可能提出的三个问题。"
+        show_say = '分析上述回答，再列出用户可能提出的三个问题。'
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=prompt,
+        inputs_show_user=show_say,
+        llm_kwargs=llm_kwargs,
+        chatbot=chatbot,
+        history=history,
+        sys_prompt=system_prompt
+    )
+    chatbot[-1] = (show_say, gpt_say)
+    history.extend([show_say, gpt_say])
+    yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+@CatchException
+def 清除缓存(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    chatbot.append(['清除本地缓存数据', '执行中. 删除数据'])
+    yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
+    def _get_log_folder(user=default_user_name):
+        PATH_LOGGING = get_conf('PATH_LOGGING')
+        _dir = os.path.join(PATH_LOGGING, user)
+        if not os.path.exists(_dir): os.makedirs(_dir)
+        return _dir
+    def _get_upload_folder(user=default_user_name):
+        PATH_PRIVATE_UPLOAD = get_conf('PATH_PRIVATE_UPLOAD')
+        _dir = os.path.join(PATH_PRIVATE_UPLOAD, user)
+        return _dir
+    shutil.rmtree(_get_log_folder(get_user(chatbot)), ignore_errors=True)
+    shutil.rmtree(_get_upload_folder(get_user(chatbot)), ignore_errors=True)
+    chatbot.append(['清除本地缓存数据', '执行完成'])
+    yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面

crazy_functions/高级功能函数模板.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+import datetime
+@CatchException
+def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，用于灵活调整复杂功能的各种参数
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append(("这是什么功能？", "[Local Message] 请注意，您正在调用一个[函数插件]的模板，该函数面向希望实现更多有趣功能的开发者，它可以作为创建新功能函数的模板（该函数只有20多行代码）。此外我们也提供可同步处理大量文件的多线程Demo供您参考。您若希望分享新的功能模组，请不吝PR！"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+    for i in range(5):
+        currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
+        currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
+        i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日？列举两条并发送相关图片。发送图片时，请使用Markdown，将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
+        gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+            inputs=i_say, inputs_show_user=i_say,
+            llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
+            sys_prompt="当你想发送一张照片时，请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
+        )
+        chatbot[-1] = (i_say, gpt_say)
+        history.append(i_say);history.append(gpt_say)
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新