gpt-analysisi-code

Sleeping

App Files Files Community

gordonchan commited on Jan 17, 2024

Commit

170f370

verified ·

1 Parent(s): 7b052bb

Upload 32 files

Browse files

Files changed (32) hide show

request_llms/README.md +35 -0
request_llms/bridge_all.py +729 -0
request_llms/bridge_chatglm.py +78 -0
request_llms/bridge_chatglm3.py +77 -0
request_llms/bridge_chatglmft.py +207 -0
request_llms/bridge_chatgpt.py +382 -0
request_llms/bridge_chatgpt_vision.py +312 -0
request_llms/bridge_chatgpt_website.py +281 -0
request_llms/bridge_deepseekcoder.py +129 -0
request_llms/bridge_google_gemini.py +109 -0
request_llms/bridge_internlm.py +203 -0
request_llms/bridge_llama2.py +90 -0
request_llms/bridge_newbingfree.py +245 -0
request_llms/bridge_qianfan.py +166 -0
request_llms/bridge_qwen.py +62 -0
request_llms/bridge_qwen_local.py +59 -0
request_llms/bridge_spark.py +63 -0
request_llms/bridge_stackclaude.py +269 -0
request_llms/bridge_tgui.py +168 -0
request_llms/bridge_zhipu.py +68 -0
request_llms/com_google.py +228 -0
request_llms/com_qwenapi.py +94 -0
request_llms/com_sparkapi.py +217 -0
request_llms/com_zhipuapi.py +67 -0
request_llms/edge_gpt_free.py +1125 -0
request_llms/key_manager.py +29 -0
request_llms/local_llm_class.py +319 -0
request_llms/queued_pipe.py +24 -0
request_llms/requirements_chatglm.txt +5 -0
request_llms/requirements_newbing.txt +8 -0
request_llms/requirements_qwen.txt +1 -0
request_llms/requirements_qwen_local.txt +5 -0

request_llms/README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+P.S. 如果您按照以下步骤成功接入了新的大模型，欢迎发Pull Requests（如果您在自己接入新模型的过程中遇到困难，欢迎加README底部QQ群联系群主）
+# 如何接入其他本地大语言模型
+1. 复制`request_llms/bridge_llama2.py`，重命名为你喜欢的名字
+2. 修改`load_model_and_tokenizer`方法，加载你的模型和分词器（去该模型官网找demo，复制粘贴即可）
+3. 修改`llm_stream_generator`方法，定义推理模型（去该模型官网找demo，复制粘贴即可）
+4. 命令行测试
+    - 修改`tests/test_llms.py`（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+    - 运行`python tests/test_llms.py`
+5. 测试通过后，在`request_llms/bridge_all.py`中做最后的修改，把你的模型完全接入到框架中（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+6. 修改`LLM_MODEL`配置，然后运行`python main.py`，测试最后的效果
+# 如何接入其他在线大语言模型
+1. 复制`request_llms/bridge_zhipu.py`，重命名为你喜欢的名字
+2. 修改`predict_no_ui_long_connection`
+3. 修改`predict`
+4. 命令行测试
+    - 修改`tests/test_llms.py`（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+    - 运行`python tests/test_llms.py`
+5. 测试通过后，在`request_llms/bridge_all.py`中做最后的修改，把你的模型完全接入到框架中（聪慧如您，只需要看一眼该文件就明白怎么修改了）
+6. 修改`LLM_MODEL`配置，然后运行`python main.py`，测试最后的效果

request_llms/bridge_all.py ADDED Viewed

	@@ -0,0 +1,729 @@

+"""
+    该文件中主要包含2个函数，是所有LLM的通用接口，它们会继续向下调用更底层的LLM模型，处理多模型并行等细节
+    不具备多线程能力的函数：正常对话时使用，具备完备的交互功能，不可多线程
+    1. predict(...)
+    具备多线程调用能力的函数：在函数插件中被调用，灵活而简洁
+    2. predict_no_ui_long_connection(...)
+"""
+import tiktoken, copy
+from functools import lru_cache
+from concurrent.futures import ThreadPoolExecutor
+from toolbox import get_conf, trimmed_format_exc
+from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
+from .bridge_chatgpt import predict as chatgpt_ui
+from .bridge_chatgpt_vision import predict_no_ui_long_connection as chatgpt_vision_noui
+from .bridge_chatgpt_vision import predict as chatgpt_vision_ui
+from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
+from .bridge_chatglm import predict as chatglm_ui
+from .bridge_chatglm3 import predict_no_ui_long_connection as chatglm3_noui
+from .bridge_chatglm3 import predict as chatglm3_ui
+from .bridge_qianfan import predict_no_ui_long_connection as qianfan_noui
+from .bridge_qianfan import predict as qianfan_ui
+from .bridge_google_gemini import predict as genai_ui
+from .bridge_google_gemini import predict_no_ui_long_connection  as genai_noui
+colors = ['#FF00FF', '#00FFFF', '#FF0000', '#990099', '#009999', '#990044']
+class LazyloadTiktoken(object):
+    def __init__(self, model):
+        self.model = model
+    @staticmethod
+    @lru_cache(maxsize=128)
+    def get_encoder(model):
+        print('正在加载tokenizer，如果是第一次运行，可能需要一点时间下载参数')
+        tmp = tiktoken.encoding_for_model(model)
+        print('加载tokenizer完毕')
+        return tmp
+    def encode(self, *args, **kwargs):
+        encoder = self.get_encoder(self.model)
+        return encoder.encode(*args, **kwargs)
+    def decode(self, *args, **kwargs):
+        encoder = self.get_encoder(self.model)
+        return encoder.decode(*args, **kwargs)
+# Endpoint 重定向
+API_URL_REDIRECT, AZURE_ENDPOINT, AZURE_ENGINE = get_conf("API_URL_REDIRECT", "AZURE_ENDPOINT", "AZURE_ENGINE")
+openai_endpoint = "https://api.openai.com/v1/chat/completions"
+api2d_endpoint = "https://openai.api2d.net/v1/chat/completions"
+newbing_endpoint = "wss://sydney.bing.com/sydney/ChatHub"
+if not AZURE_ENDPOINT.endswith('/'): AZURE_ENDPOINT += '/'
+azure_endpoint = AZURE_ENDPOINT + f'openai/deployments/{AZURE_ENGINE}/chat/completions?api-version=2023-05-15'
+# 兼容旧版的配置
+try:
+    API_URL = get_conf("API_URL")
+    if API_URL != "https://api.openai.com/v1/chat/completions":
+        openai_endpoint = API_URL
+        print("警告！API_URL配置选项将被弃用，请更换为API_URL_REDIRECT配置")
+except:
+    pass
+# 新版配置
+if openai_endpoint in API_URL_REDIRECT: openai_endpoint = API_URL_REDIRECT[openai_endpoint]
+if api2d_endpoint in API_URL_REDIRECT: api2d_endpoint = API_URL_REDIRECT[api2d_endpoint]
+if newbing_endpoint in API_URL_REDIRECT: newbing_endpoint = API_URL_REDIRECT[newbing_endpoint]
+# 获取tokenizer
+tokenizer_gpt35 = LazyloadTiktoken("gpt-3.5-turbo")
+tokenizer_gpt4 = LazyloadTiktoken("gpt-4")
+get_token_num_gpt35 = lambda txt: len(tokenizer_gpt35.encode(txt, disallowed_special=()))
+get_token_num_gpt4 = lambda txt: len(tokenizer_gpt4.encode(txt, disallowed_special=()))
+# 开始初始化模型
+AVAIL_LLM_MODELS, LLM_MODEL = get_conf("AVAIL_LLM_MODELS", "LLM_MODEL")
+AVAIL_LLM_MODELS = AVAIL_LLM_MODELS + [LLM_MODEL]
+# -=-=-=-=-=-=- 以下这部分是最早加入的最稳定的模型 -=-=-=-=-=-=-
+model_info = {
+    # openai
+    "gpt-3.5-turbo": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "gpt-3.5-turbo-16k": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 16385,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "gpt-3.5-turbo-0613": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "gpt-3.5-turbo-16k-0613": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 16385,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "gpt-3.5-turbo-1106": {#16k
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 16385,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "gpt-4": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 8192,
+        "tokenizer": tokenizer_gpt4,
+        "token_cnt": get_token_num_gpt4,
+    },
+    "gpt-4-32k": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 32768,
+        "tokenizer": tokenizer_gpt4,
+        "token_cnt": get_token_num_gpt4,
+    },
+    "gpt-4-1106-preview": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 128000,
+        "tokenizer": tokenizer_gpt4,
+        "token_cnt": get_token_num_gpt4,
+    },
+    "gpt-3.5-random": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt4,
+        "token_cnt": get_token_num_gpt4,
+    },
+    "gpt-4-vision-preview": {
+        "fn_with_ui": chatgpt_vision_ui,
+        "fn_without_ui": chatgpt_vision_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt4,
+        "token_cnt": get_token_num_gpt4,
+    },
+    # azure openai
+    "azure-gpt-3.5":{
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": azure_endpoint,
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "azure-gpt-4":{
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": azure_endpoint,
+        "max_token": 8192,
+        "tokenizer": tokenizer_gpt4,
+        "token_cnt": get_token_num_gpt4,
+    },
+    # api_2d (此后不需要在此处添加api2d的接口了，因为下面的代码会自动添加)
+    "api2d-gpt-3.5-turbo": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": api2d_endpoint,
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "api2d-gpt-4": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": api2d_endpoint,
+        "max_token": 8192,
+        "tokenizer": tokenizer_gpt4,
+        "token_cnt": get_token_num_gpt4,
+    },
+    # 将 chatglm 直接对齐到 chatglm2
+    "chatglm": {
+        "fn_with_ui": chatglm_ui,
+        "fn_without_ui": chatglm_noui,
+        "endpoint": None,
+        "max_token": 1024,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "chatglm2": {
+        "fn_with_ui": chatglm_ui,
+        "fn_without_ui": chatglm_noui,
+        "endpoint": None,
+        "max_token": 1024,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "chatglm3": {
+        "fn_with_ui": chatglm3_ui,
+        "fn_without_ui": chatglm3_noui,
+        "endpoint": None,
+        "max_token": 8192,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "qianfan": {
+        "fn_with_ui": qianfan_ui,
+        "fn_without_ui": qianfan_noui,
+        "endpoint": None,
+        "max_token": 2000,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "gemini-pro": {
+        "fn_with_ui": genai_ui,
+        "fn_without_ui": genai_noui,
+        "endpoint": None,
+        "max_token": 1024 * 32,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    "gemini-pro-vision": {
+        "fn_with_ui": genai_ui,
+        "fn_without_ui": genai_noui,
+        "endpoint": None,
+        "max_token": 1024 * 32,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+}
+# -=-=-=-=-=-=- api2d 对齐支持 -=-=-=-=-=-=-
+for model in AVAIL_LLM_MODELS:
+    if model.startswith('api2d-') and (model.replace('api2d-','') in model_info.keys()):
+        mi = copy.deepcopy(model_info[model.replace('api2d-','')])
+        mi.update({"endpoint": api2d_endpoint})
+        model_info.update({model: mi})
+# -=-=-=-=-=-=- azure 对齐支持 -=-=-=-=-=-=-
+for model in AVAIL_LLM_MODELS:
+    if model.startswith('azure-') and (model.replace('azure-','') in model_info.keys()):
+        mi = copy.deepcopy(model_info[model.replace('azure-','')])
+        mi.update({"endpoint": azure_endpoint})
+        model_info.update({model: mi})
+# -=-=-=-=-=-=- 以下部分是新加入的模型，可能附带额外依赖 -=-=-=-=-=-=-
+if "claude-1-100k" in AVAIL_LLM_MODELS or "claude-2" in AVAIL_LLM_MODELS:
+    from .bridge_claude import predict_no_ui_long_connection as claude_noui
+    from .bridge_claude import predict as claude_ui
+    model_info.update({
+        "claude-1-100k": {
+            "fn_with_ui": claude_ui,
+            "fn_without_ui": claude_noui,
+            "endpoint": None,
+            "max_token": 8196,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+    model_info.update({
+        "claude-2": {
+            "fn_with_ui": claude_ui,
+            "fn_without_ui": claude_noui,
+            "endpoint": None,
+            "max_token": 8196,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+if "jittorllms_rwkv" in AVAIL_LLM_MODELS:
+    from .bridge_jittorllms_rwkv import predict_no_ui_long_connection as rwkv_noui
+    from .bridge_jittorllms_rwkv import predict as rwkv_ui
+    model_info.update({
+        "jittorllms_rwkv": {
+            "fn_with_ui": rwkv_ui,
+            "fn_without_ui": rwkv_noui,
+            "endpoint": None,
+            "max_token": 1024,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+if "jittorllms_llama" in AVAIL_LLM_MODELS:
+    from .bridge_jittorllms_llama import predict_no_ui_long_connection as llama_noui
+    from .bridge_jittorllms_llama import predict as llama_ui
+    model_info.update({
+        "jittorllms_llama": {
+            "fn_with_ui": llama_ui,
+            "fn_without_ui": llama_noui,
+            "endpoint": None,
+            "max_token": 1024,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+if "jittorllms_pangualpha" in AVAIL_LLM_MODELS:
+    from .bridge_jittorllms_pangualpha import predict_no_ui_long_connection as pangualpha_noui
+    from .bridge_jittorllms_pangualpha import predict as pangualpha_ui
+    model_info.update({
+        "jittorllms_pangualpha": {
+            "fn_with_ui": pangualpha_ui,
+            "fn_without_ui": pangualpha_noui,
+            "endpoint": None,
+            "max_token": 1024,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+if "moss" in AVAIL_LLM_MODELS:
+    from .bridge_moss import predict_no_ui_long_connection as moss_noui
+    from .bridge_moss import predict as moss_ui
+    model_info.update({
+        "moss": {
+            "fn_with_ui": moss_ui,
+            "fn_without_ui": moss_noui,
+            "endpoint": None,
+            "max_token": 1024,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        },
+    })
+if "stack-claude" in AVAIL_LLM_MODELS:
+    from .bridge_stackclaude import predict_no_ui_long_connection as claude_noui
+    from .bridge_stackclaude import predict as claude_ui
+    model_info.update({
+        "stack-claude": {
+            "fn_with_ui": claude_ui,
+            "fn_without_ui": claude_noui,
+            "endpoint": None,
+            "max_token": 8192,
+            "tokenizer": tokenizer_gpt35,
+            "token_cnt": get_token_num_gpt35,
+        }
+    })
+if "newbing-free" in AVAIL_LLM_MODELS:
+    try:
+        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
+        from .bridge_newbingfree import predict as newbingfree_ui
+        model_info.update({
+            "newbing-free": {
+                "fn_with_ui": newbingfree_ui,
+                "fn_without_ui": newbingfree_noui,
+                "endpoint": newbing_endpoint,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "newbing" in AVAIL_LLM_MODELS:   # same with newbing-free
+    try:
+        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
+        from .bridge_newbingfree import predict as newbingfree_ui
+        model_info.update({
+            "newbing": {
+                "fn_with_ui": newbingfree_ui,
+                "fn_without_ui": newbingfree_noui,
+                "endpoint": newbing_endpoint,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "chatglmft" in AVAIL_LLM_MODELS:   # same with newbing-free
+    try:
+        from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
+        from .bridge_chatglmft import predict as chatglmft_ui
+        model_info.update({
+            "chatglmft": {
+                "fn_with_ui": chatglmft_ui,
+                "fn_without_ui": chatglmft_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "internlm" in AVAIL_LLM_MODELS:
+    try:
+        from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
+        from .bridge_internlm import predict as internlm_ui
+        model_info.update({
+            "internlm": {
+                "fn_with_ui": internlm_ui,
+                "fn_without_ui": internlm_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "chatglm_onnx" in AVAIL_LLM_MODELS:
+    try:
+        from .bridge_chatglmonnx import predict_no_ui_long_connection as chatglm_onnx_noui
+        from .bridge_chatglmonnx import predict as chatglm_onnx_ui
+        model_info.update({
+            "chatglm_onnx": {
+                "fn_with_ui": chatglm_onnx_ui,
+                "fn_without_ui": chatglm_onnx_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "qwen-local" in AVAIL_LLM_MODELS:
+    try:
+        from .bridge_qwen_local import predict_no_ui_long_connection as qwen_local_noui
+        from .bridge_qwen_local import predict as qwen_local_ui
+        model_info.update({
+            "qwen-local": {
+                "fn_with_ui": qwen_local_ui,
+                "fn_without_ui": qwen_local_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "qwen-turbo" in AVAIL_LLM_MODELS or "qwen-plus" in AVAIL_LLM_MODELS or "qwen-max" in AVAIL_LLM_MODELS:   # zhipuai
+    try:
+        from .bridge_qwen import predict_no_ui_long_connection as qwen_noui
+        from .bridge_qwen import predict as qwen_ui
+        model_info.update({
+            "qwen-turbo": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "endpoint": None,
+                "max_token": 6144,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen-plus": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "endpoint": None,
+                "max_token": 30720,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            },
+            "qwen-max": {
+                "fn_with_ui": qwen_ui,
+                "fn_without_ui": qwen_noui,
+                "endpoint": None,
+                "max_token": 28672,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "spark" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
+    try:
+        from .bridge_spark import predict_no_ui_long_connection as spark_noui
+        from .bridge_spark import predict as spark_ui
+        model_info.update({
+            "spark": {
+                "fn_with_ui": spark_ui,
+                "fn_without_ui": spark_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "sparkv2" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
+    try:
+        from .bridge_spark import predict_no_ui_long_connection as spark_noui
+        from .bridge_spark import predict as spark_ui
+        model_info.update({
+            "sparkv2": {
+                "fn_with_ui": spark_ui,
+                "fn_without_ui": spark_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "sparkv3" in AVAIL_LLM_MODELS:   # 讯飞星火认知大模型
+    try:
+        from .bridge_spark import predict_no_ui_long_connection as spark_noui
+        from .bridge_spark import predict as spark_ui
+        model_info.update({
+            "sparkv3": {
+                "fn_with_ui": spark_ui,
+                "fn_without_ui": spark_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "llama2" in AVAIL_LLM_MODELS:   # llama2
+    try:
+        from .bridge_llama2 import predict_no_ui_long_connection as llama2_noui
+        from .bridge_llama2 import predict as llama2_ui
+        model_info.update({
+            "llama2": {
+                "fn_with_ui": llama2_ui,
+                "fn_without_ui": llama2_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "zhipuai" in AVAIL_LLM_MODELS:   # zhipuai
+    try:
+        from .bridge_zhipu import predict_no_ui_long_connection as zhipu_noui
+        from .bridge_zhipu import predict as zhipu_ui
+        model_info.update({
+            "zhipuai": {
+                "fn_with_ui": zhipu_ui,
+                "fn_without_ui": zhipu_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+if "deepseekcoder" in AVAIL_LLM_MODELS:   # deepseekcoder
+    try:
+        from .bridge_deepseekcoder import predict_no_ui_long_connection as deepseekcoder_noui
+        from .bridge_deepseekcoder import predict as deepseekcoder_ui
+        model_info.update({
+            "deepseekcoder": {
+                "fn_with_ui": deepseekcoder_ui,
+                "fn_without_ui": deepseekcoder_noui,
+                "endpoint": None,
+                "max_token": 2048,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())
+# <-- 用于定义和切换多个azure模型 -->
+AZURE_CFG_ARRAY = get_conf("AZURE_CFG_ARRAY")
+if len(AZURE_CFG_ARRAY) > 0:
+    for azure_model_name, azure_cfg_dict in AZURE_CFG_ARRAY.items():
+        # 可能会覆盖之前的配置，但这是意料之中的
+        if not azure_model_name.startswith('azure'):
+            raise ValueError("AZURE_CFG_ARRAY中配置的模型必须以azure开头")
+        endpoint_ = azure_cfg_dict["AZURE_ENDPOINT"] + \
+            f'openai/deployments/{azure_cfg_dict["AZURE_ENGINE"]}/chat/completions?api-version=2023-05-15'
+        model_info.update({
+            azure_model_name: {
+                "fn_with_ui": chatgpt_ui,
+                "fn_without_ui": chatgpt_noui,
+                "endpoint": endpoint_,
+                "azure_api_key": azure_cfg_dict["AZURE_API_KEY"],
+                "max_token": azure_cfg_dict["AZURE_MODEL_MAX_TOKEN"],
+                "tokenizer": tokenizer_gpt35,   # tokenizer只用于粗估token数量
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+        if azure_model_name not in AVAIL_LLM_MODELS:
+            AVAIL_LLM_MODELS += [azure_model_name]
+def LLM_CATCH_EXCEPTION(f):
+    """
+    装饰器函数，将错误显示出来
+    """
+    def decorated(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience):
+        try:
+            return f(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
+        except Exception as e:
+            tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
+            observe_window[0] = tb_str
+            return tb_str
+    return decorated
+def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window=[], console_slience=False):
+    """
+    发送至LLM，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
+    inputs：
+        是本次问询的输入
+    sys_prompt:
+        系统静默prompt
+    llm_kwargs：
+        LLM的内部调优参数
+    history：
+        是之前的对话列表
+    observe_window = None：
+        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
+    """
+    import threading, time, copy
+    model = llm_kwargs['llm_model']
+    n_model = 1
+    if '&' not in model:
+        assert not model.startswith("tgui"), "TGUI不支持函数插件的实现"
+        # 如果只询问1个大语言模型：
+        method = model_info[model]["fn_without_ui"]
+        return method(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience)
+    else:
+        # 如果同时询问多个大语言模型，这个稍微啰嗦一点，但思路相同，您不必读这个else分支
+        executor = ThreadPoolExecutor(max_workers=4)
+        models = model.split('&')
+        n_model = len(models)
+        window_len = len(observe_window)
+        assert window_len==3
+        window_mutex = [["", time.time(), ""] for _ in range(n_model)] + [True]
+        futures = []
+        for i in range(n_model):
+            model = models[i]
+            method = model_info[model]["fn_without_ui"]
+            llm_kwargs_feedin = copy.deepcopy(llm_kwargs)
+            llm_kwargs_feedin['llm_model'] = model
+            future = executor.submit(LLM_CATCH_EXCEPTION(method), inputs, llm_kwargs_feedin, history, sys_prompt, window_mutex[i], console_slience)
+            futures.append(future)
+        def mutex_manager(window_mutex, observe_window):
+            while True:
+                time.sleep(0.25)
+                if not window_mutex[-1]: break
+                # 看门狗（watchdog）
+                for i in range(n_model):
+                    window_mutex[i][1] = observe_window[1]
+                # 观察窗（window）
+                chat_string = []
+                for i in range(n_model):
+                    chat_string.append( f"【{str(models[i])} 说】: <font color=\"{colors[i]}\"> {window_mutex[i][0]} </font>" )
+                res = '<br/><br/>\n\n---\n\n'.join(chat_string)
+                # # # # # # # # # # #
+                observe_window[0] = res
+        t_model = threading.Thread(target=mutex_manager, args=(window_mutex, observe_window), daemon=True)
+        t_model.start()
+        return_string_collect = []
+        while True:
+            worker_done = [h.done() for h in futures]
+            if all(worker_done):
+                executor.shutdown()
+                break
+            time.sleep(1)
+        for i, future in enumerate(futures):  # wait and get
+            return_string_collect.append( f"【{str(models[i])} 说】: <font color=\"{colors[i]}\"> {future.result()} </font>" )
+        window_mutex[-1] = False # stop mutex thread
+        res = '<br/><br/>\n\n---\n\n'.join(return_string_collect)
+        return res
+def predict(inputs, llm_kwargs, *args, **kwargs):
+    """
+    发送至LLM，流式获取输出。
+    用于基础的对话功能。
+    inputs 是本次问询的输入
+    top_p, temperature是LLM的内部调优参数
+    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+    additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    method = model_info[llm_kwargs['llm_model']]["fn_with_ui"]  # 如果这里报错，检查config中的AVAIL_LLM_MODELS选项
+    yield from method(inputs, llm_kwargs, *args, **kwargs)

request_llms/bridge_chatglm.py ADDED Viewed

	@@ -0,0 +1,78 @@

+model_name = "ChatGLM"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
+from toolbox import get_conf, ProxyNetworkActivate
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetGLM2Handle(LocalLLMHandle):
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        import os, glob
+        import os
+        import platform
+        from transformers import AutoModel, AutoTokenizer
+        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
+        if LOCAL_MODEL_QUANT == "INT4":         # INT4
+            _model_name_ = "THUDM/chatglm2-6b-int4"
+        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
+            _model_name_ = "THUDM/chatglm2-6b-int8"
+        else:
+            _model_name_ = "THUDM/chatglm2-6b"  # FP16
+        with ProxyNetworkActivate('Download_LLM'):
+            chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
+            if device=='cpu':
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
+            else:
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
+            chatglm_model = chatglm_model.eval()
+        self._model = chatglm_model
+        self._tokenizer = chatglm_tokenizer
+        return self._model, self._tokenizer
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+        for response, history in self._model.stream_chat(self._tokenizer,
+                                                         query,
+                                                         history,
+                                                         max_length=max_length,
+                                                         top_p=top_p,
+                                                         temperature=temperature,
+                                                         ):
+            yield response
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        # importlib.import_module('modelscope')
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetGLM2Handle, model_name)

request_llms/bridge_chatglm3.py ADDED Viewed

	@@ -0,0 +1,77 @@

+model_name = "ChatGLM3"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
+from toolbox import get_conf, ProxyNetworkActivate
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetGLM3Handle(LocalLLMHandle):
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        from transformers import AutoModel, AutoTokenizer
+        import os, glob
+        import os
+        import platform
+        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
+        if LOCAL_MODEL_QUANT == "INT4":         # INT4
+            _model_name_ = "THUDM/chatglm3-6b-int4"
+        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
+            _model_name_ = "THUDM/chatglm3-6b-int8"
+        else:
+            _model_name_ = "THUDM/chatglm3-6b"  # FP16
+        with ProxyNetworkActivate('Download_LLM'):
+            chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
+            if device=='cpu':
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cpu').float()
+            else:
+                chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True, device='cuda')
+            chatglm_model = chatglm_model.eval()
+        self._model = chatglm_model
+        self._tokenizer = chatglm_tokenizer
+        return self._model, self._tokenizer
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+        for response, history in self._model.stream_chat(self._tokenizer,
+                                                         query,
+                                                         history,
+                                                         max_length=max_length,
+                                                         top_p=top_p,
+                                                         temperature=temperature,
+                                                         ):
+            yield response
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        # importlib.import_module('modelscope')
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetGLM3Handle, model_name, history_format='chatglm3')

request_llms/bridge_chatglmft.py ADDED Viewed

	@@ -0,0 +1,207 @@

+from transformers import AutoModel, AutoTokenizer
+import time
+import os
+import json
+import threading
+import importlib
+from toolbox import update_ui, get_conf
+from multiprocessing import Process, Pipe
+load_message = "ChatGLMFT尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，ChatGLMFT消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"
+def string_to_options(arguments):
+    import argparse
+    import shlex
+    # Create an argparse.ArgumentParser instance
+    parser = argparse.ArgumentParser()
+    # Add command-line arguments
+    parser.add_argument("--llm_to_learn", type=str, help="LLM model to learn", default="gpt-3.5-turbo")
+    parser.add_argument("--prompt_prefix", type=str, help="Prompt prefix", default='')
+    parser.add_argument("--system_prompt", type=str, help="System prompt", default='')
+    parser.add_argument("--batch", type=int, help="System prompt", default=50)
+    # Parse the arguments
+    args = parser.parse_args(shlex.split(arguments))
+    return args
+#################################################################################
+class GetGLMFTHandle(Process):
+    def __init__(self):
+        super().__init__(daemon=True)
+        self.parent, self.child = Pipe()
+        self.chatglmft_model = None
+        self.chatglmft_tokenizer = None
+        self.info = ""
+        self.success = True
+        self.check_dependency()
+        self.start()
+        self.threadLock = threading.Lock()
+    def check_dependency(self):
+        try:
+            import sentencepiece
+            self.info = "依赖检测通过"
+            self.success = True
+        except:
+            self.info = "缺少ChatGLMFT的依赖，如果要使用ChatGLMFT，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_chatglm.txt`安装ChatGLM的依赖。"
+            self.success = False
+    def ready(self):
+        return self.chatglmft_model is not None
+    def run(self):
+        # 子进程执行
+        # 第一次运行，加载参数
+        retry = 0
+        while True:
+            try:
+                if self.chatglmft_model is None:
+                    from transformers import AutoConfig
+                    import torch
+                    # conf = 'request_llms/current_ptune_model.json'
+                    # if not os.path.exists(conf): raise RuntimeError('找不到微调模型信息')
+                    # with open(conf, 'r', encoding='utf8') as f:
+                    #     model_args = json.loads(f.read())
+                    CHATGLM_PTUNING_CHECKPOINT = get_conf('CHATGLM_PTUNING_CHECKPOINT')
+                    assert os.path.exists(CHATGLM_PTUNING_CHECKPOINT), "找不到微调模型检查点"
+                    conf = os.path.join(CHATGLM_PTUNING_CHECKPOINT, "config.json")
+                    with open(conf, 'r', encoding='utf8') as f:
+                        model_args = json.loads(f.read())
+                    if 'model_name_or_path' not in model_args:
+                        model_args['model_name_or_path'] = model_args['_name_or_path']
+                    self.chatglmft_tokenizer = AutoTokenizer.from_pretrained(
+                        model_args['model_name_or_path'], trust_remote_code=True)
+                    config = AutoConfig.from_pretrained(
+                        model_args['model_name_or_path'], trust_remote_code=True)
+                    config.pre_seq_len = model_args['pre_seq_len']
+                    config.prefix_projection = model_args['prefix_projection']
+                    print(f"Loading prefix_encoder weight from {CHATGLM_PTUNING_CHECKPOINT}")
+                    model = AutoModel.from_pretrained(model_args['model_name_or_path'], config=config, trust_remote_code=True)
+                    prefix_state_dict = torch.load(os.path.join(CHATGLM_PTUNING_CHECKPOINT, "pytorch_model.bin"))
+                    new_prefix_state_dict = {}
+                    for k, v in prefix_state_dict.items():
+                        if k.startswith("transformer.prefix_encoder."):
+                            new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
+                    model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
+                    if model_args['quantization_bit'] is not None and model_args['quantization_bit'] != 0:
+                        print(f"Quantized to {model_args['quantization_bit']} bit")
+                        model = model.quantize(model_args['quantization_bit'])
+                    model = model.cuda()
+                    if model_args['pre_seq_len'] is not None:
+                        # P-tuning v2
+                        model.transformer.prefix_encoder.float()
+                    self.chatglmft_model = model.eval()
+                    break
+                else:
+                    break
+            except Exception as e:
+                retry += 1
+                if retry > 3:
+                    self.child.send('[Local Message] Call ChatGLMFT fail 不能正常加载ChatGLMFT的参数。')
+                    raise RuntimeError("不能正常加载ChatGLMFT的参数！")
+        while True:
+            # 进入任务等待状态
+            kwargs = self.child.recv()
+            # 收到消息，开始请求
+            try:
+                for response, history in self.chatglmft_model.stream_chat(self.chatglmft_tokenizer, **kwargs):
+                    self.child.send(response)
+                    # # 中途接收可能的终止指令（如果有的话）
+                    # if self.child.poll():
+                    #     command = self.child.recv()
+                    #     if command == '[Terminate]': break
+            except:
+                from toolbox import trimmed_format_exc
+                self.child.send('[Local Message] Call ChatGLMFT fail.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
+            # 请求处理结束，开始下一个循环
+            self.child.send('[Finish]')
+    def stream_chat(self, **kwargs):
+        # 主进程执行
+        self.threadLock.acquire()
+        self.parent.send(kwargs)
+        while True:
+            res = self.parent.recv()
+            if res != '[Finish]':
+                yield res
+            else:
+                break
+        self.threadLock.release()
+global glmft_handle
+glmft_handle = None
+#################################################################################
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    global glmft_handle
+    if glmft_handle is None:
+        glmft_handle = GetGLMFTHandle()
+        if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + glmft_handle.info
+        if not glmft_handle.success:
+            error = glmft_handle.info
+            glmft_handle = None
+            raise RuntimeError(error)
+    # chatglmft 没有 sys_prompt 接口，因此把prompt加入 history
+    history_feedin = []
+    history_feedin.append(["What can I do?", sys_prompt])
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]] )
+    watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
+    response = ""
+    for response in glmft_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        if len(observe_window) >= 1:  observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience:
+                raise RuntimeError("程序终止。")
+    return response
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    global glmft_handle
+    if glmft_handle is None:
+        glmft_handle = GetGLMFTHandle()
+        chatbot[-1] = (inputs, load_message + "\n\n" + glmft_handle.info)
+        yield from update_ui(chatbot=chatbot, history=[])
+        if not glmft_handle.success:
+            glmft_handle = None
+            return
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    # 处理历史信息
+    history_feedin = []
+    history_feedin.append(["What can I do?", system_prompt] )
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]] )
+    # 开始接收chatglmft的回复
+    response = "[Local Message] 等待ChatGLMFT响应中 ..."
+    for response in glmft_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+    # 总结输出
+    if response == "[Local Message] 等待ChatGLMFT响应中 ...":
+        response = "[Local Message] ChatGLMFT响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)

request_llms/bridge_chatgpt.py ADDED Viewed

	@@ -0,0 +1,382 @@

+# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
+"""
+    该文件中主要包含三个函数
+    不具备多线程能力的函数：
+    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
+    具备多线程调用能力的函数
+    2. predict_no_ui_long_connection：支持多线程
+"""
+import json
+import time
+import gradio as gr
+import logging
+import traceback
+import requests
+import importlib
+import random
+# config_private.py放自己的秘密如API和代理网址
+# 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
+from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder
+proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
+    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
+timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
+                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
+def get_full_error(chunk, stream_response):
+    """
+        获取完整的从Openai返回的报错
+    """
+    while True:
+        try:
+            chunk += next(stream_response)
+        except:
+            break
+    return chunk
+def decode_chunk(chunk):
+    # 提前读取一些信息 （用于判断异常）
+    chunk_decoded = chunk.decode()
+    chunkjson = None
+    has_choices = False
+    choice_valid = False
+    has_content = False
+    has_role = False
+    try:
+        chunkjson = json.loads(chunk_decoded[6:])
+        has_choices = 'choices' in chunkjson
+        if has_choices: choice_valid = (len(chunkjson['choices']) > 0)
+        if has_choices and choice_valid: has_content = ("content" in chunkjson['choices'][0]["delta"])
+        if has_content: has_content = (chunkjson['choices'][0]["delta"]["content"] is not None)
+        if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"]
+    except:
+        pass
+    return chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role
+from functools import lru_cache
+@lru_cache(maxsize=32)
+def verify_endpoint(endpoint):
+    """
+        检查endpoint是否可用
+    """
+    if "你亲手写的api名称" in endpoint:
+        raise ValueError("Endpoint不正确, 请检查AZURE_ENDPOINT的配置! 当前的Endpoint为:" + endpoint)
+    return endpoint
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+    """
+    发送至chatGPT，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
+    inputs：
+        是本次问询的输入
+    sys_prompt:
+        系统静默prompt
+    llm_kwargs：
+        chatGPT的内部调优参数
+    history：
+        是之前的对话列表
+    observe_window = None：
+        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
+    """
+    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
+    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=False
+            from .bridge_all import model_info
+            endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
+        except requests.exceptions.ReadTimeout as e:
+            retry += 1
+            traceback.print_exc()
+            if retry > MAX_RETRY: raise TimeoutError
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+    stream_response = response.iter_lines()
+    result = ''
+    json_data = None
+    while True:
+        try: chunk = next(stream_response)
+        except StopIteration:
+            break
+        except requests.exceptions.ConnectionError:
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+        chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
+        if len(chunk_decoded)==0: continue
+        if not chunk_decoded.startswith('data:'):
+            error_msg = get_full_error(chunk, stream_response).decode()
+            if "reduce the length" in error_msg:
+                raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
+            else:
+                raise RuntimeError("OpenAI拒绝了请求：" + error_msg)
+        if ('data: [DONE]' in chunk_decoded): break # api2d 正常完成
+        # 提前读取一些信息 （用于判断异常）
+        if has_choices and not choice_valid:
+            # 一些垃圾第三方接口的出现这样的错误
+            continue
+        json_data = chunkjson['choices'][0]
+        delta = json_data["delta"]
+        if len(delta) == 0: break
+        if "role" in delta: continue
+        if "content" in delta:
+            result += delta["content"]
+            if not console_slience: print(delta["content"], end='')
+            if observe_window is not None:
+                # 观测窗，把已经获取的数据显示出去
+                if len(observe_window) >= 1:
+                    observe_window[0] += delta["content"]
+                # 看门狗，如果超过期限没有喂狗，则终止
+                if len(observe_window) >= 2:
+                    if (time.time()-observe_window[1]) > watch_dog_patience:
+                        raise RuntimeError("用户取消了程序。")
+        else: raise RuntimeError("意外Json结构："+delta)
+    if json_data and json_data['finish_reason'] == 'content_filter':
+        raise RuntimeError("由于提问含不合规内容被Azure过滤。")
+    if json_data and json_data['finish_reason'] == 'length':
+        raise ConnectionAbortedError("正常结束，但显示Token不足，导致输出不完整，请削减单次输入的文本量。")
+    return result
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+    发送至chatGPT，流式获取输出。
+    用于基础的对话功能。
+    inputs 是本次问询的输入
+    top_p, temperature是chatGPT的内部调优参数
+    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+    additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    if is_any_api_key(inputs):
+        chatbot._cookies['api_key'] = inputs
+        chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
+        yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
+        return
+    elif not is_any_api_key(chatbot._cookies['api_key']):
+        chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。"))
+        yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
+        return
+    user_input = inputs
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    raw_input = inputs
+    logging.info(f'[raw_input] {raw_input}')
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+    # check mis-behavior
+    if is_the_upload_folder(user_input):
+        chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误！当您上传文档之后，需点击“**函数插件区**”按钮进行处理，请勿点击“提交”按钮或者“基础功能区”按钮。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
+        time.sleep(2)
+    try:
+        headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
+    except RuntimeError as e:
+        chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
+        return
+    # 检查endpoint是否合法
+    try:
+        from .bridge_all import model_info
+        endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
+    except:
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (inputs, tb_str)
+        yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面
+        return
+    history.append(inputs); history.append("")
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=True
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
+        except:
+            retry += 1
+            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
+            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
+            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界���
+            if retry > MAX_RETRY: raise TimeoutError
+    gpt_replying_buffer = ""
+    is_head_of_the_stream = True
+    if stream:
+        stream_response =  response.iter_lines()
+        while True:
+            try:
+                chunk = next(stream_response)
+            except StopIteration:
+                # 非OpenAI官方接口的出现这样的报错，OpenAI和API2D不会走这里
+                chunk_decoded = chunk.decode()
+                error_msg = chunk_decoded
+                # 首先排除一个one-api没有done数据包的第三方Bug情形
+                if len(gpt_replying_buffer.strip()) > 0 and len(error_msg) == 0:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="检测到有缺陷的非OpenAI官方接口，建议选择更稳定的接口。")
+                    break
+                # 其他情况，直接返回报错
+                chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                yield from update_ui(chatbot=chatbot, history=history, msg="非OpenAI官方接口返回了错误:" + chunk.decode()) # 刷新界面
+                return
+            # 提前读取一些信息 （用于判断异常）
+            chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
+            if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"content" not in chunk_decoded):
+                # 数据流的第一帧不携带content
+                is_head_of_the_stream = False; continue
+            if chunk:
+                try:
+                    if has_choices and not choice_valid:
+                        # 一些垃圾第三方接口的出现这样的错误
+                        continue
+                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
+                    if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
+                        # 判定为数据流的结束，gpt_replying_buffer也写完了
+                        logging.info(f'[response] {gpt_replying_buffer}')
+                        break
+                    # 处理数据流的主体
+                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
+                    # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
+                    if has_content:
+                        # 正常情况
+                        gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
+                    elif has_role:
+                        # 一些第三方接口的出现这样的错误，兼容一下吧
+                        continue
+                    else:
+                        # 一些垃圾第三方接口的出现这样的错误
+                        gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
+                    history[-1] = gpt_replying_buffer
+                    chatbot[-1] = (history[-2], history[-1])
+                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
+                except Exception as e:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
+                    chunk = get_full_error(chunk, stream_response)
+                    chunk_decoded = chunk.decode()
+                    error_msg = chunk_decoded
+                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
+                    print(error_msg)
+                    return
+def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
+    from .bridge_all import model_info
+    openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
+    if "reduce the length" in error_msg:
+        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
+        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
+                                               max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
+    elif "does not exist" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
+    elif "Incorrect API key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
+    elif "exceeded your current quota" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
+    elif "account is not active" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "associated with a deactivated account" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "API key has been deactivated" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] API key has been deactivated. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "bad forward key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
+    elif "Not enough point" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
+    else:
+        from toolbox import regular_txt_to_markdown
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
+    return chatbot, history
+def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    if not is_any_api_key(llm_kwargs['api_key']):
+        raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。")
+    api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
+    if llm_kwargs['llm_model'].startswith('azure-'):
+        headers.update({"api-key": api_key})
+        if llm_kwargs['llm_model'] in AZURE_CFG_ARRAY.keys():
+            azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"]
+            headers.update({"api-key": azure_api_key_unshared})
+    conversation_cnt = len(history) // 2
+    messages = [{"role": "system", "content": system_prompt}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "": continue
+                if what_gpt_answer["content"] == timeout_bot_msg: continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    model = llm_kwargs['llm_model']
+    if llm_kwargs['llm_model'].startswith('api2d-'):
+        model = llm_kwargs['llm_model'][len('api2d-'):]
+    if model == "gpt-3.5-random": # 随机选择, 绕过openai访问频率限制
+        model = random.choice([
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-16k-0613",
+            "gpt-3.5-turbo-0301",
+        ])
+        logging.info("Random select model:" + model)
+    payload = {
+        "model": model,
+        "messages": messages,
+        "temperature": llm_kwargs['temperature'],  # 1.0,
+        "top_p": llm_kwargs['top_p'],  # 1.0,
+        "n": 1,
+        "stream": stream,
+        "presence_penalty": 0,
+        "frequency_penalty": 0,
+    }
+    try:
+        print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
+    except:
+        print('输入中可能存在乱码。')
+    return headers,payload

request_llms/bridge_chatgpt_vision.py ADDED Viewed

	@@ -0,0 +1,312 @@

+"""
+    该文件中主要包含三个函数
+    不具备多线程能力的函数：
+    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
+    具备多线程调用能力的函数
+    2. predict_no_ui_long_connection：支持多线程
+"""
+import json
+import time
+import logging
+import requests
+import base64
+import os
+import glob
+from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, is_the_upload_folder, \
+    update_ui_lastest_msg, get_max_token, encode_image, have_any_recent_upload_image_files
+proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG, AZURE_CFG_ARRAY = \
+    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG', 'AZURE_CFG_ARRAY')
+timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
+                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
+def report_invalid_key(key):
+    if get_conf("BLOCK_INVALID_APIKEY"):
+        # 实验性功能，自动检测并屏蔽失效的KEY，请勿使用
+        from request_llms.key_manager import ApiKeyManager
+        api_key = ApiKeyManager().add_key_to_blacklist(key)
+def get_full_error(chunk, stream_response):
+    """
+        获取完整的从Openai返回的报错
+    """
+    while True:
+        try:
+            chunk += next(stream_response)
+        except:
+            break
+    return chunk
+def decode_chunk(chunk):
+    # 提前读取一些信息 （用于判断异常）
+    chunk_decoded = chunk.decode()
+    chunkjson = None
+    has_choices = False
+    choice_valid = False
+    has_content = False
+    has_role = False
+    try:
+        chunkjson = json.loads(chunk_decoded[6:])
+        has_choices = 'choices' in chunkjson
+        if has_choices: choice_valid = (len(chunkjson['choices']) > 0)
+        if has_choices and choice_valid: has_content = "content" in chunkjson['choices'][0]["delta"]
+        if has_choices and choice_valid: has_role = "role" in chunkjson['choices'][0]["delta"]
+    except:
+        pass
+    return chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role
+from functools import lru_cache
+@lru_cache(maxsize=32)
+def verify_endpoint(endpoint):
+    """
+        检查endpoint是否可用
+    """
+    return endpoint
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+    raise NotImplementedError
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    have_recent_file, image_paths = have_any_recent_upload_image_files(chatbot)
+    if is_any_api_key(inputs):
+        chatbot._cookies['api_key'] = inputs
+        chatbot.append(("输入已识别为openai的api_key", what_keys(inputs)))
+        yield from update_ui(chatbot=chatbot, history=history, msg="api_key已导入") # 刷新界面
+        return
+    elif not is_any_api_key(chatbot._cookies['api_key']):
+        chatbot.append((inputs, "缺少api_key。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。"))
+        yield from update_ui(chatbot=chatbot, history=history, msg="缺少api_key") # 刷新界面
+        return
+    if not have_recent_file:
+        chatbot.append((inputs, "没有检测到任何近期上传的图像文件，请上传jpg格式的图片，此外，请注意拓展名需要小写"))
+        yield from update_ui(chatbot=chatbot, history=history, msg="等待图片") # 刷新界面
+        return
+    if os.path.exists(inputs):
+        chatbot.append((inputs, "已经接收到您上传的文件，您不需要再重复强调该文件的路径了，请直接输入您的问题。"))
+        yield from update_ui(chatbot=chatbot, history=history, msg="等待指令") # 刷新界面
+        return
+    user_input = inputs
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    raw_input = inputs
+    logging.info(f'[raw_input] {raw_input}')
+    def make_media_input(inputs, image_paths):
+        for image_path in image_paths:
+            inputs = inputs + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
+        return inputs
+    chatbot.append((make_media_input(inputs, image_paths), ""))
+    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+    # check mis-behavior
+    if is_the_upload_folder(user_input):
+        chatbot[-1] = (inputs, f"[Local Message] 检测到操作错误！当您上传文档之后，需点击“**函数插件区**”按钮进行处理，请勿点击“提交”按钮或者“基础功能区”按钮。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="正常") # 刷新界面
+        time.sleep(2)
+    try:
+        headers, payload, api_key = generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths)
+    except RuntimeError as e:
+        chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
+        return
+    # 检查endpoint是否合法
+    try:
+        from .bridge_all import model_info
+        endpoint = verify_endpoint(model_info[llm_kwargs['llm_model']]['endpoint'])
+    except:
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (inputs, tb_str)
+        yield from update_ui(chatbot=chatbot, history=history, msg="Endpoint不满足要求") # 刷新界面
+        return
+    history.append(make_media_input(inputs, image_paths))
+    history.append("")
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=True
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
+        except:
+            retry += 1
+            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
+            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
+            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+            if retry > MAX_RETRY: raise TimeoutError
+    gpt_replying_buffer = ""
+    is_head_of_the_stream = True
+    if stream:
+        stream_response =  response.iter_lines()
+        while True:
+            try:
+                chunk = next(stream_response)
+            except StopIteration:
+                # 非OpenAI官方接口的出现这样的报错，OpenAI和API2D不会走这里
+                chunk_decoded = chunk.decode()
+                error_msg = chunk_decoded
+                # 首先排除一个one-api没有done数据包的第三方Bug情形
+                if len(gpt_replying_buffer.strip()) > 0 and len(error_msg) == 0:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="检测到有缺陷的非OpenAI官方接口，建议选择更稳定的接口。")
+                    break
+                # 其他情况，直接返回报错
+                chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key)
+                yield from update_ui(chatbot=chatbot, history=history, msg="非OpenAI官方接口返回了错误:" + chunk.decode()) # 刷新界面
+                return
+            # 提前读取一些信息 （用于判断异常）
+            chunk_decoded, chunkjson, has_choices, choice_valid, has_content, has_role = decode_chunk(chunk)
+            if is_head_of_the_stream and (r'"object":"error"' not in chunk_decoded) and (r"content" not in chunk_decoded):
+                # 数据流的第一帧不携带content
+                is_head_of_the_stream = False; continue
+            if chunk:
+                try:
+                    if has_choices and not choice_valid:
+                        # 一些垃圾第三方接口的出现这样的错误
+                        continue
+                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
+                    if ('data: [DONE]' in chunk_decoded) or (len(chunkjson['choices'][0]["delta"]) == 0):
+                        # 判定为数据流的结束，gpt_replying_buffer也写完了
+                        lastmsg = chatbot[-1][-1] + f"\n\n\n\n「{llm_kwargs['llm_model']}调用结束，该模型不具备上下文对话能力，如需追问，请及时切换模型。」"
+                        yield from update_ui_lastest_msg(lastmsg, chatbot, history, delay=1)
+                        logging.info(f'[response] {gpt_replying_buffer}')
+                        break
+                    # 处理数据流的主体
+                    status_text = f"finish_reason: {chunkjson['choices'][0].get('finish_reason', 'null')}"
+                    # 如果这里抛出异常，一般是文本过长，详情见get_full_error的输出
+                    if has_content:
+                        # 正常情况
+                        gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
+                    elif has_role:
+                        # 一些第三方接口的出现这样的错误，兼容一下吧
+                        continue
+                    else:
+                        # 一些垃圾第三方接口的出现这样的错误
+                        gpt_replying_buffer = gpt_replying_buffer + chunkjson['choices'][0]["delta"]["content"]
+                    history[-1] = gpt_replying_buffer
+                    chatbot[-1] = (history[-2], history[-1])
+                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
+                except Exception as e:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
+                    chunk = get_full_error(chunk, stream_response)
+                    chunk_decoded = chunk.decode()
+                    error_msg = chunk_decoded
+                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key)
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
+                    print(error_msg)
+                    return
+def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg, api_key=""):
+    from .bridge_all import model_info
+    openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
+    if "reduce the length" in error_msg:
+        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
+        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
+                                               max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
+    elif "does not exist" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
+    elif "Incorrect API key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website); report_invalid_key(api_key)
+    elif "exceeded your current quota" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
+    elif "account is not active" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
+    elif "associated with a deactivated account" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
+    elif "API key has been deactivated" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] API key has been deactivated. OpenAI以账户失效为由, 拒绝服务." + openai_website); report_invalid_key(api_key)
+    elif "bad forward key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
+    elif "Not enough point" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
+    else:
+        from toolbox import regular_txt_to_markdown
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
+    return chatbot, history
+def generate_payload(inputs, llm_kwargs, history, system_prompt, image_paths):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    if not is_any_api_key(llm_kwargs['api_key']):
+        raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。")
+    api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"
+    }
+    if API_ORG.startswith('org-'): headers.update({"OpenAI-Organization": API_ORG})
+    if llm_kwargs['llm_model'].startswith('azure-'):
+        headers.update({"api-key": api_key})
+        if llm_kwargs['llm_model'] in AZURE_CFG_ARRAY.keys():
+            azure_api_key_unshared = AZURE_CFG_ARRAY[llm_kwargs['llm_model']]["AZURE_API_KEY"]
+            headers.update({"api-key": azure_api_key_unshared})
+    base64_images = []
+    for image_path in image_paths:
+        base64_images.append(encode_image(image_path))
+    messages = []
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = []
+    what_i_ask_now["content"].append({
+        "type": "text",
+        "text": inputs
+    })
+    for image_path, base64_image in zip(image_paths, base64_images):
+        what_i_ask_now["content"].append({
+            "type": "image_url",
+            "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_image}"
+            }
+        })
+    messages.append(what_i_ask_now)
+    model = llm_kwargs['llm_model']
+    if llm_kwargs['llm_model'].startswith('api2d-'):
+        model = llm_kwargs['llm_model'][len('api2d-'):]
+    payload = {
+        "model": model,
+        "messages": messages,
+        "temperature": llm_kwargs['temperature'],   # 1.0,
+        "top_p": llm_kwargs['top_p'],               # 1.0,
+        "n": 1,
+        "stream": True,
+        "max_tokens": get_max_token(llm_kwargs),
+        "presence_penalty": 0,
+        "frequency_penalty": 0,
+    }
+    try:
+        print(f" {llm_kwargs['llm_model']} : {inputs[:100]} ..........")
+    except:
+        print('输入中可能存在乱码。')
+    return headers, payload, api_key

request_llms/bridge_chatgpt_website.py ADDED Viewed

	@@ -0,0 +1,281 @@

+# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
+"""
+    该文件中主要包含三个函数
+    不具备多线程能力的函数：
+    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
+    具备多线程调用能力的函数
+    2. predict_no_ui_long_connection：支持多线程
+"""
+import json
+import time
+import gradio as gr
+import logging
+import traceback
+import requests
+import importlib
+# config_private.py放自己的秘密如API和代理网址
+# 读取时首先看是否存在私密的config_private配置文件（不受git管控），如果有，则覆盖原config文件
+from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc
+proxies, TIMEOUT_SECONDS, MAX_RETRY, API_ORG = \
+    get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'API_ORG')
+timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
+                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
+def get_full_error(chunk, stream_response):
+    """
+        获取完整的从Openai返回的报错
+    """
+    while True:
+        try:
+            chunk += next(stream_response)
+        except:
+            break
+    return chunk
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+    """
+    发送至chatGPT，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
+    inputs：
+        是本次问询的输入
+    sys_prompt:
+        系统静默prompt
+    llm_kwargs：
+        chatGPT的内部调优参数
+    history：
+        是之前的对话列表
+    observe_window = None：
+        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
+    """
+    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
+    headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=False
+            from .bridge_all import model_info
+            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
+        except requests.exceptions.ReadTimeout as e:
+            retry += 1
+            traceback.print_exc()
+            if retry > MAX_RETRY: raise TimeoutError
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
+    stream_response =  response.iter_lines()
+    result = ''
+    while True:
+        try: chunk = next(stream_response).decode()
+        except StopIteration:
+            break
+        except requests.exceptions.ConnectionError:
+            chunk = next(stream_response).decode() # 失败了，重试一次？再失败就没办法了。
+        if len(chunk)==0: continue
+        if not chunk.startswith('data:'):
+            error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
+            if "reduce the length" in error_msg:
+                raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
+            else:
+                raise RuntimeError("OpenAI拒绝了请求：" + error_msg)
+        if ('data: [DONE]' in chunk): break # api2d 正常完成
+        json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
+        delta = json_data["delta"]
+        if len(delta) == 0: break
+        if "role" in delta: continue
+        if "content" in delta:
+            result += delta["content"]
+            if not console_slience: print(delta["content"], end='')
+            if observe_window is not None:
+                # 观测窗，把已经获取的数据显示出去
+                if len(observe_window) >= 1: observe_window[0] += delta["content"]
+                # 看门狗，如果超过期限没有喂狗，则终止
+                if len(observe_window) >= 2:
+                    if (time.time()-observe_window[1]) > watch_dog_patience:
+                        raise RuntimeError("用户取消了程序。")
+        else: raise RuntimeError("意外Json结构："+delta)
+    if json_data['finish_reason'] == 'content_filter':
+        raise RuntimeError("由于提问含不合规内容被Azure过滤。")
+    if json_data['finish_reason'] == 'length':
+        raise ConnectionAbortedError("正常结束，但显示Token不足，导致输出不完整，请削减单次输入的文本量。")
+    return result
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+    发送至chatGPT，流式获取输出。
+    用于基础的对话功能。
+    inputs 是本次问询的输入
+    top_p, temperature是chatGPT的内部调优参数
+    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+    additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    raw_input = inputs
+    logging.info(f'[raw_input] {raw_input}')
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+    try:
+        headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
+    except RuntimeError as e:
+        chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
+        yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
+        return
+    history.append(inputs); history.append("")
+    retry = 0
+    while True:
+        try:
+            # make a POST request to the API endpoint, stream=True
+            from .bridge_all import model_info
+            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
+            response = requests.post(endpoint, headers=headers, proxies=proxies,
+                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
+        except:
+            retry += 1
+            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
+            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
+            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+            if retry > MAX_RETRY: raise TimeoutError
+    gpt_replying_buffer = ""
+    is_head_of_the_stream = True
+    if stream:
+        stream_response =  response.iter_lines()
+        while True:
+            try:
+                chunk = next(stream_response)
+            except StopIteration:
+                # 非OpenAI官方接口的出现这样的报错，OpenAI和API2D不会走这里
+                chunk_decoded = chunk.decode()
+                error_msg = chunk_decoded
+                chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
+                return
+            # print(chunk.decode()[6:])
+            if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()):
+                # 数据流的第一帧不携带content
+                is_head_of_the_stream = False; continue
+            if chunk:
+                try:
+                    chunk_decoded = chunk.decode()
+                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
+                    if 'data: [DONE]' in chunk_decoded:
+                        # 判定为数据流的结束，gpt_replying_buffer也写完了
+                        logging.info(f'[response] {gpt_replying_buffer}')
+                        break
+                    # 处理数据流的主体
+                    chunkjson = json.loads(chunk_decoded[6:])
+                    status_text = f"finish_reason: {chunkjson['choices'][0]['finish_reason']}"
+                    delta = chunkjson['choices'][0]["delta"]
+                    if "content" in delta:
+                        gpt_replying_buffer = gpt_replying_buffer + delta["content"]
+                    history[-1] = gpt_replying_buffer
+                    chatbot[-1] = (history[-2], history[-1])
+                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
+                except Exception as e:
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
+                    chunk = get_full_error(chunk, stream_response)
+                    chunk_decoded = chunk.decode()
+                    error_msg = chunk_decoded
+                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
+                    print(error_msg)
+                    return
+def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
+    from .bridge_all import model_info
+    openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
+    if "reduce the length" in error_msg:
+        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
+        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
+                                               max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
+                        # history = []    # 清除历史
+    elif "does not exist" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
+    elif "Incorrect API key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
+    elif "exceeded your current quota" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
+    elif "account is not active" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "associated with a deactivated account" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "bad forward key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
+    elif "Not enough point" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
+    else:
+        from toolbox import regular_txt_to_markdown
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
+    return chatbot, history
+def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
+    """
+    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
+    """
+    if not is_any_api_key(llm_kwargs['api_key']):
+        raise AssertionError("你提供了错误的API_KEY。\n\n1. 临时解决方案：直接在输入区键入api_key，然后回车提交。\n\n2. 长效解决方案：在config.py中配置。")
+    headers = {
+        "Content-Type": "application/json",
+    }
+    conversation_cnt = len(history) // 2
+    messages = [{"role": "system", "content": system_prompt}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "": continue
+                if what_gpt_answer["content"] == timeout_bot_msg: continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    payload = {
+        "model": llm_kwargs['llm_model'].strip('api2d-'),
+        "messages": messages,
+        "temperature": llm_kwargs['temperature'],  # 1.0,
+        "top_p": llm_kwargs['top_p'],  # 1.0,
+        "n": 1,
+        "stream": stream,
+        "presence_penalty": 0,
+        "frequency_penalty": 0,
+    }
+    try:
+        print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
+    except:
+        print('输入中可能存在乱码。')
+    return headers,payload

request_llms/bridge_deepseekcoder.py ADDED Viewed

	@@ -0,0 +1,129 @@

+model_name = "deepseek-coder-6.7b-instruct"
+cmd_to_install = "未知" # "`pip install -r request_llms/requirements_qwen.txt`"
+import os
+from toolbox import ProxyNetworkActivate
+from toolbox import get_conf
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+from threading import Thread
+import torch
+def download_huggingface_model(model_name, max_retry, local_dir):
+    from huggingface_hub import snapshot_download
+    for i in range(1, max_retry):
+        try:
+            snapshot_download(repo_id=model_name, local_dir=local_dir, resume_download=True)
+            break
+        except Exception as e:
+            print(f'\n\n下载失败，重试第{i}次中...\n\n')
+    return local_dir
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetCoderLMHandle(LocalLLMHandle):
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        with ProxyNetworkActivate('Download_LLM'):
+            from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+            model_name = "deepseek-ai/deepseek-coder-6.7b-instruct"
+            # local_dir = f"~/.cache/{model_name}"
+            # if not os.path.exists(local_dir):
+            #     tokenizer = download_huggingface_model(model_name, max_retry=128, local_dir=local_dir)
+            tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+            self._streamer = TextIteratorStreamer(tokenizer)
+            device_map = {
+                "transformer.word_embeddings": 0,
+                "transformer.word_embeddings_layernorm": 0,
+                "lm_head": 0,
+                "transformer.h": 0,
+                "transformer.ln_f": 0,
+                "model.embed_tokens": 0,
+                "model.layers": 0,
+                "model.norm": 0,
+            }
+            # 检查量化配置
+            quantization_type = get_conf('LOCAL_MODEL_QUANT')
+            if get_conf('LOCAL_MODEL_DEVICE') != 'cpu':
+                if quantization_type == "INT8":
+                    from transformers import BitsAndBytesConfig
+                    # 使用 INT8 量化
+                    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, load_in_8bit=True,
+                                                                 device_map=device_map)
+                elif quantization_type == "INT4":
+                    from transformers import BitsAndBytesConfig
+                    # 使用 INT4 量化
+                    bnb_config = BitsAndBytesConfig(
+                        load_in_4bit=True,
+                        bnb_4bit_use_double_quant=True,
+                        bnb_4bit_quant_type="nf4",
+                        bnb_4bit_compute_dtype=torch.bfloat16
+                    )
+                    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
+                                                                 quantization_config=bnb_config, device_map=device_map)
+                else:
+                    # 使用默认的 FP16
+                    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
+                                                                 torch_dtype=torch.bfloat16, device_map=device_map)
+            else:
+                # CPU 模式
+                model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True,
+                                                             torch_dtype=torch.bfloat16)
+        return model, tokenizer
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+        history.append({ 'role': 'user', 'content': query})
+        messages = history
+        inputs = self._tokenizer.apply_chat_template(messages, return_tensors="pt")
+        if inputs.shape[1] > max_length:
+            inputs = inputs[:, -max_length:]
+        inputs = inputs.to(self._model.device)
+        generation_kwargs = dict(
+                                    inputs=inputs,
+                                    max_new_tokens=max_length,
+                                    do_sample=False,
+                                    top_p=top_p,
+                                    streamer = self._streamer,
+                                    top_k=50,
+                                    temperature=temperature,
+                                    num_return_sequences=1,
+                                    eos_token_id=32021,
+                                )
+        thread = Thread(target=self._model.generate, kwargs=generation_kwargs, daemon=True)
+        thread.start()
+        generated_text = ""
+        for new_text in self._streamer:
+            generated_text += new_text
+            # print(generated_text)
+            yield generated_text
+    def try_to_import_special_deps(self, **kwargs): pass
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        # import importlib
+        # importlib.import_module('modelscope')
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetCoderLMHandle, model_name, history_format='chatglm3')

request_llms/bridge_google_gemini.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# encoding: utf-8
+# @Time   : 2023/12/21
+# @Author : Spike
+# @Descr   :
+import json
+import re
+import os
+import time
+from request_llms.com_google import GoogleChatInit
+from toolbox import get_conf, update_ui, update_ui_lastest_msg, have_any_recent_upload_image_files, trimmed_format_exc
+proxies, TIMEOUT_SECONDS, MAX_RETRY = get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY')
+timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
+                  '网络错误，检查代理服务器是否可用，以及代理设置的格式是否正确，格式须是[协议]://[地址]:[端口]，缺一不可。'
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None,
+                                  console_slience=False):
+    # 检查API_KEY
+    if get_conf("GEMINI_API_KEY") == "":
+        raise ValueError(f"请配置 GEMINI_API_KEY。")
+    genai = GoogleChatInit()
+    watch_dog_patience = 5  # 看门狗的耐心, 设置5秒即可
+    gpt_replying_buffer = ''
+    stream_response = genai.generate_chat(inputs, llm_kwargs, history, sys_prompt)
+    for response in stream_response:
+        results = response.decode()
+        match = re.search(r'"text":\s*"((?:[^"\\]|\\.)*)"', results, flags=re.DOTALL)
+        error_match = re.search(r'\"message\":\s*\"(.*?)\"', results, flags=re.DOTALL)
+        if match:
+            try:
+                paraphrase = json.loads('{"text": "%s"}' % match.group(1))
+            except:
+                raise ValueError(f"解析GEMINI消息出错。")
+            buffer = paraphrase['text']
+            gpt_replying_buffer += buffer
+            if len(observe_window) >= 1:
+                observe_window[0] = gpt_replying_buffer
+            if len(observe_window) >= 2:
+                if (time.time() - observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+        if error_match:
+            raise RuntimeError(f'{gpt_replying_buffer} 对话错误')
+    return gpt_replying_buffer
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
+    # 检查API_KEY
+    if get_conf("GEMINI_API_KEY") == "":
+        yield from update_ui_lastest_msg(f"请配置 GEMINI_API_KEY。", chatbot=chatbot, history=history, delay=0)
+        return
+    if "vision" in llm_kwargs["llm_model"]:
+        have_recent_file, image_paths = have_any_recent_upload_image_files(chatbot)
+        def make_media_input(inputs, image_paths):
+            for image_path in image_paths:
+                inputs = inputs + f'<br/><br/><div align="center"><img src="file={os.path.abspath(image_path)}"></div>'
+            return inputs
+        if have_recent_file:
+            inputs = make_media_input(inputs, image_paths)
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history)
+    genai = GoogleChatInit()
+    retry = 0
+    while True:
+        try:
+            stream_response = genai.generate_chat(inputs, llm_kwargs, history, system_prompt)
+            break
+        except Exception as e:
+            retry += 1
+            chatbot[-1] = ((chatbot[-1][0], trimmed_format_exc()))
+            yield from update_ui(chatbot=chatbot, history=history, msg="请求失败")  # 刷新界面
+            return
+    gpt_replying_buffer = ""
+    gpt_security_policy = ""
+    history.extend([inputs, ''])
+    for response in stream_response:
+        results = response.decode("utf-8")    # 被这个解码给耍了。。
+        gpt_security_policy += results
+        match = re.search(r'"text":\s*"((?:[^"\\]|\\.)*)"', results, flags=re.DOTALL)
+        error_match = re.search(r'\"message\":\s*\"(.*)\"', results, flags=re.DOTALL)
+        if match:
+            try:
+                paraphrase = json.loads('{"text": "%s"}' % match.group(1))
+            except:
+                raise ValueError(f"解析GEMINI消息出错。")
+            gpt_replying_buffer += paraphrase['text']    # 使用 json 解析库进行处理
+            chatbot[-1] = (inputs, gpt_replying_buffer)
+            history[-1] = gpt_replying_buffer
+            yield from update_ui(chatbot=chatbot, history=history)
+        if error_match:
+            history = history[-2]  # 错误的不纳入对话
+            chatbot[-1] = (inputs, gpt_replying_buffer + f"对话错误，请查看message\n\n```\n{error_match.group(1)}\n```")
+            yield from update_ui(chatbot=chatbot, history=history)
+            raise RuntimeError('对话错误')
+    if not gpt_replying_buffer:
+        history = history[-2]  # 错误的不纳入对话
+        chatbot[-1] = (inputs, gpt_replying_buffer + f"触发了Google的安全访问策略，没有回答\n\n```\n{gpt_security_policy}\n```")
+        yield from update_ui(chatbot=chatbot, history=history)
+if __name__ == '__main__':
+    import sys
+    llm_kwargs = {'llm_model': 'gemini-pro'}
+    result = predict('Write long a story about a magic backpack.', llm_kwargs, llm_kwargs, [])
+    for i in result:
+        print(i)

request_llms/bridge_internlm.py ADDED Viewed

	@@ -0,0 +1,203 @@

+model_name = "InternLM"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
+from transformers import AutoModel, AutoTokenizer
+import time
+import threading
+import importlib
+from toolbox import update_ui, get_conf, ProxyNetworkActivate
+from multiprocessing import Process, Pipe
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model Utils
+# ------------------------------------------------------------------------------------------------------------------------
+def try_to_import_special_deps():
+    import sentencepiece
+def combine_history(prompt, hist):
+    user_prompt = "<|User|>:{user}<eoh>\n"
+    robot_prompt = "<|Bot|>:{robot}<eoa>\n"
+    cur_query_prompt = "<|User|>:{user}<eoh>\n<|Bot|>:"
+    messages = hist
+    total_prompt = ""
+    for message in messages:
+        cur_content = message
+        cur_prompt = user_prompt.replace("{user}", cur_content[0])
+        total_prompt += cur_prompt
+        cur_prompt = robot_prompt.replace("{robot}", cur_content[1])
+        total_prompt += cur_prompt
+    total_prompt = total_prompt + cur_query_prompt.replace("{user}", prompt)
+    return total_prompt
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetInternlmHandle(LocalLLMHandle):
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+    def try_to_import_special_deps(self, **kwargs):
+        """
+        import something that will raise error if the user does not install requirement_*.txt
+        """
+        import sentencepiece
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        device = get_conf('LOCAL_MODEL_DEVICE')
+        with ProxyNetworkActivate('Download_LLM'):
+            if self._model is None:
+                tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
+                if device=='cpu':
+                    model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
+                else:
+                    model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
+                model = model.eval()
+        return model, tokenizer
+    def llm_stream_generator(self, **kwargs):
+        import torch
+        import logging
+        import copy
+        import warnings
+        import torch.nn as nn
+        from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor():
+            model = self._model
+            tokenizer = self._tokenizer
+            prompt = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            real_prompt = combine_history(prompt, history)
+            return model, tokenizer, real_prompt, max_length, top_p, temperature
+        model, tokenizer, prompt, max_length, top_p, temperature = adaptor()
+        prefix_allowed_tokens_fn = None
+        logits_processor = None
+        stopping_criteria = None
+        additional_eos_token_id = 103028
+        generation_config = None
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ https://github.com/InternLM/InternLM/blob/efbf5335709a8c8faeac6eaf07193973ff1d56a1/web_demo.py#L25
+        inputs = tokenizer([prompt], padding=True, return_tensors="pt")
+        input_length = len(inputs["input_ids"][0])
+        device = get_conf('LOCAL_MODEL_DEVICE')
+        for k, v in inputs.items():
+            inputs[k] = v.to(device)
+        input_ids = inputs["input_ids"]
+        batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
+        if generation_config is None:
+            generation_config = model.generation_config
+        generation_config = copy.deepcopy(generation_config)
+        model_kwargs = generation_config.update(**kwargs)
+        bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
+        if isinstance(eos_token_id, int):
+            eos_token_id = [eos_token_id]
+        if additional_eos_token_id is not None:
+            eos_token_id.append(additional_eos_token_id)
+        has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
+        if has_default_max_length and generation_config.max_new_tokens is None:
+            warnings.warn(
+                f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
+                "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
+                " recommend using `max_new_tokens` to control the maximum length of the generation.",
+                UserWarning,
+            )
+        elif generation_config.max_new_tokens is not None:
+            generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
+            if not has_default_max_length:
+                logging.warn(
+                    f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
+                    f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
+                    "Please refer to the documentation for more information. "
+                    "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
+                    UserWarning,
+                )
+        if input_ids_seq_length >= generation_config.max_length:
+            input_ids_string = "input_ids"
+            logging.warning(
+                f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
+                f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
+                " increasing `max_new_tokens`."
+            )
+        # 2. Set generation parameters if not already defined
+        logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
+        stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
+        logits_processor = model._get_logits_processor(
+            generation_config=generation_config,
+            input_ids_seq_length=input_ids_seq_length,
+            encoder_input_ids=input_ids,
+            prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
+            logits_processor=logits_processor,
+        )
+        stopping_criteria = model._get_stopping_criteria(
+            generation_config=generation_config, stopping_criteria=stopping_criteria
+        )
+        logits_warper = model._get_logits_warper(generation_config)
+        unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
+        scores = None
+        while True:
+            model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
+            # forward pass to get next token
+            outputs = model(
+                **model_inputs,
+                return_dict=True,
+                output_attentions=False,
+                output_hidden_states=False,
+            )
+            next_token_logits = outputs.logits[:, -1, :]
+            # pre-process distribution
+            next_token_scores = logits_processor(input_ids, next_token_logits)
+            next_token_scores = logits_warper(input_ids, next_token_scores)
+            # sample
+            probs = nn.functional.softmax(next_token_scores, dim=-1)
+            if generation_config.do_sample:
+                next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
+            else:
+                next_tokens = torch.argmax(probs, dim=-1)
+            # update generated ids, model inputs, and length for next step
+            input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
+            model_kwargs = model._update_model_kwargs_for_generation(
+                outputs, model_kwargs, is_encoder_decoder=False
+            )
+            unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
+            output_token_ids = input_ids[0].cpu().tolist()
+            output_token_ids = output_token_ids[input_length:]
+            for each_eos_token_id in eos_token_id:
+                if output_token_ids[-1] == each_eos_token_id:
+                    output_token_ids = output_token_ids[:-1]
+            response = tokenizer.decode(output_token_ids)
+            yield response
+            # stop when each sentence is finished, or if we exceed the maximum length
+            if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
+                return
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetInternlmHandle, model_name)

request_llms/bridge_llama2.py ADDED Viewed

	@@ -0,0 +1,90 @@

+model_name = "LLaMA"
+cmd_to_install = "`pip install -r request_llms/requirements_chatglm.txt`"
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from toolbox import update_ui, get_conf, ProxyNetworkActivate
+from multiprocessing import Process, Pipe
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+from threading import Thread
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetLlamaHandle(LocalLLMHandle):
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        import os, glob
+        import os
+        import platform
+        huggingface_token, device = get_conf('HUGGINGFACE_ACCESS_TOKEN', 'LOCAL_MODEL_DEVICE')
+        assert len(huggingface_token) != 0, "没有填写 HUGGINGFACE_ACCESS_TOKEN"
+        with open(os.path.expanduser('~/.cache/huggingface/token'), 'w') as f:
+            f.write(huggingface_token)
+        model_id = 'meta-llama/Llama-2-7b-chat-hf'
+        with ProxyNetworkActivate('Download_LLM'):
+            self._tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=huggingface_token)
+            # use fp16
+            model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=huggingface_token).eval()
+            if device.startswith('cuda'): model = model.half().to(device)
+            self._model = model
+            return self._model, self._tokenizer
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            console_slience = kwargs.get('console_slience', True)
+            return query, max_length, top_p, temperature, history, console_slience
+        def convert_messages_to_prompt(query, history):
+            prompt = ""
+            for a, b in history:
+                prompt += f"\n[INST]{a}[/INST]"
+                prompt += "\n{b}" + b
+            prompt += f"\n[INST]{query}[/INST]"
+            return prompt
+        query, max_length, top_p, temperature, history, console_slience = adaptor(kwargs)
+        prompt = convert_messages_to_prompt(query, history)
+        # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
+        # code from transformers.llama
+        streamer = TextIteratorStreamer(self._tokenizer)
+        # Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
+        inputs = self._tokenizer([prompt], return_tensors="pt")
+        prompt_tk_back = self._tokenizer.batch_decode(inputs['input_ids'])[0]
+        generation_kwargs = dict(inputs.to(self._model.device), streamer=streamer, max_new_tokens=max_length)
+        thread = Thread(target=self._model.generate, kwargs=generation_kwargs)
+        thread.start()
+        generated_text = ""
+        for new_text in streamer:
+            generated_text += new_text
+            if not console_slience: print(new_text, end='')
+            yield generated_text.lstrip(prompt_tk_back).rstrip("</s>")
+        if not console_slience: print()
+        # =-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-=-
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        importlib.import_module('transformers')
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetLlamaHandle, model_name)

request_llms/bridge_newbingfree.py ADDED Viewed

	@@ -0,0 +1,245 @@

+"""
+========================================================================
+第一部分：来自EdgeGPT.py
+https://github.com/acheong08/EdgeGPT
+========================================================================
+"""
+from .edge_gpt_free import Chatbot as NewbingChatbot
+load_message = "等待NewBing响应。"
+"""
+========================================================================
+第二部分：子进程Worker（调用主体）
+========================================================================
+"""
+import time
+import json
+import re
+import logging
+import asyncio
+import importlib
+import threading
+from toolbox import update_ui, get_conf, trimmed_format_exc
+from multiprocessing import Process, Pipe
+def preprocess_newbing_out(s):
+    pattern = r'\^(\d+)\^' # 匹配^数字^
+    sub = lambda m: '('+m.group(1)+')' # 将匹配到的数字作为替换值
+    result = re.sub(pattern, sub, s) # 替换操作
+    if '[1]' in result:
+        result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n'
+    return result
+def preprocess_newbing_out_simple(result):
+    if '[1]' in result:
+        result += '\n\n```reference\n' + "\n".join([r for r in result.split('\n') if r.startswith('[')]) + '\n```\n'
+    return result
+class NewBingHandle(Process):
+    def __init__(self):
+        super().__init__(daemon=True)
+        self.parent, self.child = Pipe()
+        self.newbing_model = None
+        self.info = ""
+        self.success = True
+        self.local_history = []
+        self.check_dependency()
+        self.start()
+        self.threadLock = threading.Lock()
+    def check_dependency(self):
+        try:
+            self.success = False
+            import certifi, httpx, rich
+            self.info = "依赖检测通过，等待NewBing响应。注意目前不能多人同时调用NewBing接口（有线程锁），否则将导致每个人的NewBing问询历史互相渗透。调用NewBing时，会自动使用已配置的代理。"
+            self.success = True
+        except:
+            self.info = "缺少的依赖，如果要使用Newbing，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_newbing.txt`安装Newbing的依赖。"
+            self.success = False
+    def ready(self):
+        return self.newbing_model is not None
+    async def async_run(self):
+        # 读取配置
+        NEWBING_STYLE = get_conf('NEWBING_STYLE')
+        from request_llms.bridge_all import model_info
+        endpoint = model_info['newbing']['endpoint']
+        while True:
+            # 等待
+            kwargs = self.child.recv()
+            question=kwargs['query']
+            history=kwargs['history']
+            system_prompt=kwargs['system_prompt']
+            # 是否重置
+            if len(self.local_history) > 0 and len(history)==0:
+                await self.newbing_model.reset()
+                self.local_history = []
+            # 开始问问题
+            prompt = ""
+            if system_prompt not in self.local_history:
+                self.local_history.append(system_prompt)
+                prompt += system_prompt + '\n'
+            # 追加历史
+            for ab in history:
+                a, b = ab
+                if a not in self.local_history:
+                    self.local_history.append(a)
+                    prompt += a + '\n'
+            # 问题
+            prompt += question
+            self.local_history.append(question)
+            print('question:', prompt)
+            # 提交
+            async for final, response in self.newbing_model.ask_stream(
+                prompt=question,
+                conversation_style=NEWBING_STYLE,     # ["creative", "balanced", "precise"]
+                wss_link=endpoint,                    # "wss://sydney.bing.com/sydney/ChatHub"
+            ):
+                if not final:
+                    print(response)
+                    self.child.send(str(response))
+                else:
+                    print('-------- receive final ---------')
+                    self.child.send('[Finish]')
+                    # self.local_history.append(response)
+    def run(self):
+        """
+        这个函数运行在子进程
+        """
+        # 第一次运行，加载参数
+        self.success = False
+        self.local_history = []
+        if (self.newbing_model is None) or (not self.success):
+            # 代理设置
+            proxies, NEWBING_COOKIES = get_conf('proxies', 'NEWBING_COOKIES')
+            if proxies is None:
+                self.proxies_https = None
+            else:
+                self.proxies_https = proxies['https']
+            if (NEWBING_COOKIES is not None) and len(NEWBING_COOKIES) > 100:
+                try:
+                    cookies = json.loads(NEWBING_COOKIES)
+                except:
+                    self.success = False
+                    tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
+                    self.child.send(f'[Local Message] NEWBING_COOKIES未填写或有格式错误。')
+                    self.child.send('[Fail]'); self.child.send('[Finish]')
+                    raise RuntimeError(f"NEWBING_COOKIES未填写或有格式错误。")
+            else:
+                cookies = None
+            try:
+                self.newbing_model = NewbingChatbot(proxy=self.proxies_https, cookies=cookies)
+            except:
+                self.success = False
+                tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
+                self.child.send(f'[Local Message] 不能加载Newbing组件，请注意Newbing组件已不再维护。{tb_str}')
+                self.child.send('[Fail]')
+                self.child.send('[Finish]')
+                raise RuntimeError(f"不能加载Newbing组件，请注意Newbing组件已不再维护。")
+        self.success = True
+        try:
+            # 进入任务等待状态
+            asyncio.run(self.async_run())
+        except Exception:
+            tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
+            self.child.send(f'[Local Message] Newbing 请求失败，报错信息如下. 如果是与网络相关的问题，建议更换代理协议（推荐http）或代理节点 {tb_str}.')
+            self.child.send('[Fail]')
+            self.child.send('[Finish]')
+    def stream_chat(self, **kwargs):
+        """
+        这个函数运行在主进程
+        """
+        self.threadLock.acquire()   # 获取线程锁
+        self.parent.send(kwargs)    # 请求子进程
+        while True:
+            res = self.parent.recv()                            # 等待newbing回复的片段
+            if res == '[Finish]': break                         # 结束
+            elif res == '[Fail]': self.success = False; break   # 失败
+            else: yield res                                     # newbing回复的片段
+        self.threadLock.release()   # 释放线程锁
+"""
+========================================================================
+第三部分：主进程统一调用函数接口
+========================================================================
+"""
+global newbingfree_handle
+newbingfree_handle = None
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    global newbingfree_handle
+    if (newbingfree_handle is None) or (not newbingfree_handle.success):
+        newbingfree_handle = NewBingHandle()
+        if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + newbingfree_handle.info
+        if not newbingfree_handle.success:
+            error = newbingfree_handle.info
+            newbingfree_handle = None
+            raise RuntimeError(error)
+    # 没有 sys_prompt 接口，因此把prompt加入 history
+    history_feedin = []
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]] )
+    watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
+    response = ""
+    if len(observe_window) >= 1: observe_window[0] = "[Local Message] 等待NewBing响应中 ..."
+    for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        if len(observe_window) >= 1:  observe_window[0] = preprocess_newbing_out_simple(response)
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience:
+                raise RuntimeError("程序终止。")
+    return preprocess_newbing_out_simple(response)
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, "[Local Message] 等待NewBing响应中 ..."))
+    global newbingfree_handle
+    if (newbingfree_handle is None) or (not newbingfree_handle.success):
+        newbingfree_handle = NewBingHandle()
+        chatbot[-1] = (inputs, load_message + "\n\n" + newbingfree_handle.info)
+        yield from update_ui(chatbot=chatbot, history=[])
+        if not newbingfree_handle.success:
+            newbingfree_handle = None
+            return
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    history_feedin = []
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]] )
+    chatbot[-1] = (inputs, "[Local Message] 等待NewBing响应中 ...")
+    response = "[Local Message] 等待NewBing响应中 ..."
+    yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓���，尚未完成全部响应，请耐心完成后再提交新问题。")
+    for response in newbingfree_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        chatbot[-1] = (inputs, preprocess_newbing_out(response))
+        yield from update_ui(chatbot=chatbot, history=history, msg="NewBing响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
+    if response == "[Local Message] 等待NewBing响应中 ...": response = "[Local Message] NewBing响应异常，请刷新界面重试 ..."
+    history.extend([inputs, response])
+    logging.info(f'[raw_input] {inputs}')
+    logging.info(f'[response] {response}')
+    yield from update_ui(chatbot=chatbot, history=history, msg="完成全部响应，请提交新问题。")

request_llms/bridge_qianfan.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import time, requests, json
+from multiprocessing import Process, Pipe
+from functools import wraps
+from datetime import datetime, timedelta
+from toolbox import get_conf, update_ui, is_any_api_key, select_api_key, what_keys, clip_history, trimmed_format_exc, get_conf
+model_name = '千帆大模型平台'
+timeout_bot_msg = '[Local Message] Request timeout. Network error.'
+def cache_decorator(timeout):
+    cache = {}
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            key = (func.__name__, args, frozenset(kwargs.items()))
+            # Check if result is already cached and not expired
+            if key in cache:
+                result, timestamp = cache[key]
+                if datetime.now() - timestamp < timedelta(seconds=timeout):
+                    return result
+            # Call the function and cache the result
+            result = func(*args, **kwargs)
+            cache[key] = (result, datetime.now())
+            return result
+        return wrapper
+    return decorator
+@cache_decorator(timeout=3600)
+def get_access_token():
+    """
+    使用 AK，SK 生成鉴权签名（Access Token）
+    :return: access_token，或是None(如果错误)
+    """
+    # if (access_token_cache is None) or (time.time() - last_access_token_obtain_time > 3600):
+    BAIDU_CLOUD_API_KEY, BAIDU_CLOUD_SECRET_KEY = get_conf('BAIDU_CLOUD_API_KEY', 'BAIDU_CLOUD_SECRET_KEY')
+    if len(BAIDU_CLOUD_SECRET_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_SECRET_KEY")
+    if len(BAIDU_CLOUD_API_KEY) == 0: raise RuntimeError("没有配置BAIDU_CLOUD_API_KEY")
+    url = "https://aip.baidubce.com/oauth/2.0/token"
+    params = {"grant_type": "client_credentials", "client_id": BAIDU_CLOUD_API_KEY, "client_secret": BAIDU_CLOUD_SECRET_KEY}
+    access_token_cache = str(requests.post(url, params=params).json().get("access_token"))
+    return access_token_cache
+    # else:
+    #     return access_token_cache
+def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
+    conversation_cnt = len(history) // 2
+    if system_prompt == "": system_prompt = "Hello"
+    messages = [{"role": "user", "content": system_prompt}]
+    messages.append({"role": "assistant", "content": 'Certainly!'})
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index] if history[index]!="" else "Hello"
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1] if history[index]!="" else "Hello"
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "": continue
+                if what_gpt_answer["content"] == timeout_bot_msg: continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    return messages
+def generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
+    BAIDU_CLOUD_QIANFAN_MODEL = get_conf('BAIDU_CLOUD_QIANFAN_MODEL')
+    url_lib = {
+        "ERNIE-Bot-4":          "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro",
+        "ERNIE-Bot":            "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions",
+        "ERNIE-Bot-turbo":      "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant",
+        "BLOOMZ-7B":            "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/bloomz_7b1",
+        "Llama-2-70B-Chat":     "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_70b",
+        "Llama-2-13B-Chat":     "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_13b",
+        "Llama-2-7B-Chat":      "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/llama_2_7b",
+    }
+    url = url_lib[BAIDU_CLOUD_QIANFAN_MODEL]
+    url += "?access_token=" + get_access_token()
+    payload = json.dumps({
+        "messages": generate_message_payload(inputs, llm_kwargs, history, system_prompt),
+        "stream": True
+    })
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.request("POST", url, headers=headers, data=payload, stream=True)
+    buffer = ""
+    for line in response.iter_lines():
+        if len(line) == 0: continue
+        try:
+            dec = line.decode().lstrip('data:')
+            dec = json.loads(dec)
+            incoming = dec['result']
+            buffer += incoming
+            yield buffer
+        except:
+            if ('error_code' in dec) and ("max length" in dec['error_msg']):
+                raise ConnectionAbortedError(dec['error_msg'])  # 上下文太长导致 token 溢出
+            elif ('error_code' in dec):
+                raise RuntimeError(dec['error_msg'])
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    watch_dog_patience = 5
+    response = ""
+    for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, sys_prompt):
+        if len(observe_window) >= 1:
+            observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+    return response
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    yield from update_ui(chatbot=chatbot, history=history)
+    # 开始接收回复
+    try:
+        for response in generate_from_baidu_qianfan(inputs, llm_kwargs, history, system_prompt):
+            chatbot[-1] = (inputs, response)
+            yield from update_ui(chatbot=chatbot, history=history)
+    except ConnectionAbortedError as e:
+        from .bridge_all import model_info
+        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
+        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
+                    max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
+        yield from update_ui(chatbot=chatbot, history=history, msg="异常") # 刷新界面
+        return
+    # 总结输出
+    response = f"[Local Message] {model_name}响应异常 ..."
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)

request_llms/bridge_qwen.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import time
+import os
+from toolbox import update_ui, get_conf, update_ui_lastest_msg
+from toolbox import check_packages, report_exception
+model_name = 'Qwen'
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    watch_dog_patience = 5
+    response = ""
+    from .com_qwenapi import QwenRequestInstance
+    sri = QwenRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
+        if len(observe_window) >= 1:
+            observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+    return response
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history)
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        check_packages(["dashscope"])
+    except:
+        yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖，安装方法```pip install --upgrade dashscope```。",
+                                         chatbot=chatbot, history=history, delay=0)
+        return
+    # 检查DASHSCOPE_API_KEY
+    if get_conf("DASHSCOPE_API_KEY") == "":
+        yield from update_ui_lastest_msg(f"请配置 DASHSCOPE_API_KEY。",
+                                         chatbot=chatbot, history=history, delay=0)
+        return
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    # 开始接收回复
+    from .com_qwenapi import QwenRequestInstance
+    sri = QwenRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+    # 总结输出
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)

request_llms/bridge_qwen_local.py ADDED Viewed

	@@ -0,0 +1,59 @@

+model_name = "Qwen_Local"
+cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`"
+from toolbox import ProxyNetworkActivate, get_conf
+from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 Local Model
+# ------------------------------------------------------------------------------------------------------------------------
+class GetQwenLMHandle(LocalLLMHandle):
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        self.model_name = model_name
+        self.cmd_to_install = cmd_to_install
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        from transformers.generation import GenerationConfig
+        with ProxyNetworkActivate('Download_LLM'):
+            model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION')
+            self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
+            # use fp16
+            model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
+            model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True)  # 可指定不同的生成长度、top_p等相关超参
+            self._model = model
+        return self._model, self._tokenizer
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor(kwargs):
+            query = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            return query, max_length, top_p, temperature, history
+        query, max_length, top_p, temperature, history = adaptor(kwargs)
+        for response in self._model.chat_stream(self._tokenizer, query, history=history):
+            yield response
+    def try_to_import_special_deps(self, **kwargs):
+        # import something that will raise error if the user does not install requirement_*.txt
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
+        import importlib
+        importlib.import_module('modelscope')
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic Interface
+# ------------------------------------------------------------------------------------------------------------------------
+predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)

request_llms/bridge_spark.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import time
+import threading
+import importlib
+from toolbox import update_ui, get_conf, update_ui_lastest_msg
+from multiprocessing import Process, Pipe
+model_name = '星火认知大模型'
+def validate_key():
+    XFYUN_APPID = get_conf('XFYUN_APPID')
+    if XFYUN_APPID == '00000000' or XFYUN_APPID == '':
+        return False
+    return True
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    watch_dog_patience = 5
+    response = ""
+    if validate_key() is False:
+        raise RuntimeError('请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET')
+    from .com_sparkapi import SparkRequestInstance
+    sri = SparkRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt, use_image_api=False):
+        if len(observe_window) >= 1:
+            observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+    return response
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history)
+    if validate_key() is False:
+        yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET", chatbot=chatbot, history=history, delay=0)
+        return
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    # 开始接收回复
+    from .com_sparkapi import SparkRequestInstance
+    sri = SparkRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, system_prompt, use_image_api=True):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+    # 总结输出
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)

request_llms/bridge_stackclaude.py ADDED Viewed

	@@ -0,0 +1,269 @@

+from .bridge_newbingfree import preprocess_newbing_out, preprocess_newbing_out_simple
+from multiprocessing import Process, Pipe
+from toolbox import update_ui, get_conf, trimmed_format_exc
+import threading
+import importlib
+import logging
+import time
+from toolbox import get_conf
+import asyncio
+load_message = "正在加载Claude组件，请稍候..."
+try:
+    """
+    ========================================================================
+    第一部分：Slack API Client
+    https://github.com/yokonsan/claude-in-slack-api
+    ========================================================================
+    """
+    from slack_sdk.errors import SlackApiError
+    from slack_sdk.web.async_client import AsyncWebClient
+    class SlackClient(AsyncWebClient):
+        """SlackClient类用于与Slack API进行交互，实现消息发送、接收等功能。
+            属性：
+            - CHANNEL_ID：str类型，表示频道ID。
+            方法：
+            - open_channel()：异步方法。通过调用conversations_open方法打开一个频道，并将返回的频道ID保存在属性CHANNEL_ID中。
+            - chat(text: str)：异步方法。向已打开的频道发送一条文本消息。
+            - get_slack_messages()：异步方法。获取已打开频道的最新消息并返回消息列表，目前不支持历史消息查询。
+            - get_reply()：异步方法。循环监听已打开频道的消息，如果收到"Typing…_"结尾的消息说明Claude还在继续输出，否则结束循环。
+        """
+        CHANNEL_ID = None
+        async def open_channel(self):
+            response = await self.conversations_open(users=get_conf('SLACK_CLAUDE_BOT_ID'))
+            self.CHANNEL_ID = response["channel"]["id"]
+        async def chat(self, text):
+            if not self.CHANNEL_ID:
+                raise Exception("Channel not found.")
+            resp = await self.chat_postMessage(channel=self.CHANNEL_ID, text=text)
+            self.LAST_TS = resp["ts"]
+        async def get_slack_messages(self):
+            try:
+                # TODO：暂时不支持历史消息，因为在同一个频道里存在多人使用时历史消息渗透问题
+                resp = await self.conversations_history(channel=self.CHANNEL_ID, oldest=self.LAST_TS, limit=1)
+                msg = [msg for msg in resp["messages"]
+                    if msg.get("user") == get_conf('SLACK_CLAUDE_BOT_ID')]
+                return msg
+            except (SlackApiError, KeyError) as e:
+                raise RuntimeError(f"获取Slack消息失败。")
+        async def get_reply(self):
+            while True:
+                slack_msgs = await self.get_slack_messages()
+                if len(slack_msgs) == 0:
+                    await asyncio.sleep(0.5)
+                    continue
+                msg = slack_msgs[-1]
+                if msg["text"].endswith("Typing…_"):
+                    yield False, msg["text"]
+                else:
+                    yield True, msg["text"]
+                    break
+except:
+    pass
+"""
+========================================================================
+第二部分：子进程Worker（调用主体）
+========================================================================
+"""
+class ClaudeHandle(Process):
+    def __init__(self):
+        super().__init__(daemon=True)
+        self.parent, self.child = Pipe()
+        self.claude_model = None
+        self.info = ""
+        self.success = True
+        self.local_history = []
+        self.check_dependency()
+        if self.success:
+            self.start()
+            self.threadLock = threading.Lock()
+    def check_dependency(self):
+        try:
+            self.success = False
+            import slack_sdk
+            self.info = "依赖检测通过，等待Claude响应。注意目前不能多人同时调用Claude接口（有线程锁），否则将导致每个人的Claude问询历史互相渗透。调用Claude时，会自动使用已配置的代理。"
+            self.success = True
+        except:
+            self.info = "缺少的依赖，如果要使用Claude，除了基础的pip依赖以外，您还需要运行`pip install -r request_llms/requirements_slackclaude.txt`安装Claude的依赖，然后重启程序。"
+            self.success = False
+    def ready(self):
+        return self.claude_model is not None
+    async def async_run(self):
+        await self.claude_model.open_channel()
+        while True:
+            # 等待
+            kwargs = self.child.recv()
+            question = kwargs['query']
+            history = kwargs['history']
+            # 开始问问题
+            prompt = ""
+            # 问题
+            prompt += question
+            print('question:', prompt)
+            # 提交
+            await self.claude_model.chat(prompt)
+            # 获取回复
+            async for final, response in self.claude_model.get_reply():
+                if not final:
+                    print(response)
+                    self.child.send(str(response))
+                else:
+                    # 防止丢失最后一条消息
+                    slack_msgs = await self.claude_model.get_slack_messages()
+                    last_msg = slack_msgs[-1]["text"] if slack_msgs and len(slack_msgs) > 0 else ""
+                    if last_msg:
+                        self.child.send(last_msg)
+                    print('-------- receive final ---------')
+                    self.child.send('[Finish]')
+    def run(self):
+        """
+        这个函数运行在子进程
+        """
+        # 第一次运行，加载参数
+        self.success = False
+        self.local_history = []
+        if (self.claude_model is None) or (not self.success):
+            # 代理设置
+            proxies = get_conf('proxies')
+            if proxies is None:
+                self.proxies_https = None
+            else:
+                self.proxies_https = proxies['https']
+            try:
+                SLACK_CLAUDE_USER_TOKEN = get_conf('SLACK_CLAUDE_USER_TOKEN')
+                self.claude_model = SlackClient(token=SLACK_CLAUDE_USER_TOKEN, proxy=self.proxies_https)
+                print('Claude组件初始化成功。')
+            except:
+                self.success = False
+                tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
+                self.child.send(f'[Local Message] 不能加载Claude组件。{tb_str}')
+                self.child.send('[Fail]')
+                self.child.send('[Finish]')
+                raise RuntimeError(f"不能加载Claude组件。")
+        self.success = True
+        try:
+            # 进入任务等待状态
+            asyncio.run(self.async_run())
+        except Exception:
+            tb_str = '\n```\n' + trimmed_format_exc() + '\n```\n'
+            self.child.send(f'[Local Message] Claude失败 {tb_str}.')
+            self.child.send('[Fail]')
+            self.child.send('[Finish]')
+    def stream_chat(self, **kwargs):
+        """
+        这个函数运行在主进程
+        """
+        self.threadLock.acquire()
+        self.parent.send(kwargs)    # 发送请求到子进程
+        while True:
+            res = self.parent.recv()    # 等待Claude回复的片段
+            if res == '[Finish]':
+                break       # 结束
+            elif res == '[Fail]':
+                self.success = False
+                break
+            else:
+                yield res   # Claude回复的片段
+        self.threadLock.release()
+"""
+========================================================================
+第三部分：主进程统一调用函数接口
+========================================================================
+"""
+global claude_handle
+claude_handle = None
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+    """
+        多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    global claude_handle
+    if (claude_handle is None) or (not claude_handle.success):
+        claude_handle = ClaudeHandle()
+        observe_window[0] = load_message + "\n\n" + claude_handle.info
+        if not claude_handle.success:
+            error = claude_handle.info
+            claude_handle = None
+            raise RuntimeError(error)
+    # 没有 sys_prompt 接口，因此把prompt加入 history
+    history_feedin = []
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]])
+    watch_dog_patience = 5  # 看门狗 (watchdog) 的耐心, 设置5秒即可
+    response = ""
+    observe_window[0] = "[Local Message] 等待Claude响应中 ..."
+    for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=sys_prompt, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        observe_window[0] = preprocess_newbing_out_simple(response)
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience:
+                raise RuntimeError("程序终止。")
+    return preprocess_newbing_out_simple(response)
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
+    """
+        单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, "[Local Message] 等待Claude响应中 ..."))
+    global claude_handle
+    if (claude_handle is None) or (not claude_handle.success):
+        claude_handle = ClaudeHandle()
+        chatbot[-1] = (inputs, load_message + "\n\n" + claude_handle.info)
+        yield from update_ui(chatbot=chatbot, history=[])
+        if not claude_handle.success:
+            claude_handle = None
+            return
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    history_feedin = []
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]])
+    chatbot[-1] = (inputs, "[Local Message] 等待Claude响应中 ...")
+    response = "[Local Message] 等待Claude响应中 ..."
+    yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
+    for response in claude_handle.stream_chat(query=inputs, history=history_feedin, system_prompt=system_prompt):
+        chatbot[-1] = (inputs, preprocess_newbing_out(response))
+        yield from update_ui(chatbot=chatbot, history=history, msg="Claude响应缓慢，尚未完成全部响应，请耐心完成后再提交新问题。")
+    if response == "[Local Message] 等待Claude响应中 ...":
+        response = "[Local Message] Claude响应异常，请刷新界面重试 ..."
+    history.extend([inputs, response])
+    logging.info(f'[raw_input] {inputs}')
+    logging.info(f'[response] {response}')
+    yield from update_ui(chatbot=chatbot, history=history, msg="完成全部响应，请提交新问题。")

request_llms/bridge_tgui.py ADDED Viewed

	@@ -0,0 +1,168 @@

+'''
+Contributed by SagsMug. Modified by binary-husky
+https://github.com/oobabooga/text-generation-webui/pull/175
+'''
+import asyncio
+import json
+import random
+import string
+import websockets
+import logging
+import time
+import threading
+import importlib
+from toolbox import get_conf, update_ui
+def random_hash():
+    letters = string.ascii_lowercase + string.digits
+    return ''.join(random.choice(letters) for i in range(9))
+async def run(context, max_token, temperature, top_p, addr, port):
+    params = {
+        'max_new_tokens': max_token,
+        'do_sample': True,
+        'temperature': temperature,
+        'top_p': top_p,
+        'typical_p': 1,
+        'repetition_penalty': 1.05,
+        'encoder_repetition_penalty': 1.0,
+        'top_k': 0,
+        'min_length': 0,
+        'no_repeat_ngram_size': 0,
+        'num_beams': 1,
+        'penalty_alpha': 0,
+        'length_penalty': 1,
+        'early_stopping': True,
+        'seed': -1,
+    }
+    session = random_hash()
+    async with websockets.connect(f"ws://{addr}:{port}/queue/join") as websocket:
+        while content := json.loads(await websocket.recv()):
+            #Python3.10 syntax, replace with if elif on older
+            if content["msg"] ==  "send_hash":
+                await websocket.send(json.dumps({
+                    "session_hash": session,
+                    "fn_index": 12
+                }))
+            elif content["msg"] ==  "estimation":
+                pass
+            elif content["msg"] ==  "send_data":
+                await websocket.send(json.dumps({
+                    "session_hash": session,
+                    "fn_index": 12,
+                    "data": [
+                        context,
+                        params['max_new_tokens'],
+                        params['do_sample'],
+                        params['temperature'],
+                        params['top_p'],
+                        params['typical_p'],
+                        params['repetition_penalty'],
+                        params['encoder_repetition_penalty'],
+                        params['top_k'],
+                        params['min_length'],
+                        params['no_repeat_ngram_size'],
+                        params['num_beams'],
+                        params['penalty_alpha'],
+                        params['length_penalty'],
+                        params['early_stopping'],
+                        params['seed'],
+                    ]
+                }))
+            elif content["msg"] ==  "process_starts":
+                pass
+            elif content["msg"] in ["process_generating", "process_completed"]:
+                yield content["output"]["data"][0]
+                # You can search for your desired end indicator and
+                #  stop generation by closing the websocket here
+                if (content["msg"] == "process_completed"):
+                    break
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        发送至chatGPT，流式获取输出。
+        用于基础的对话功能。
+        inputs 是本次问询的输入
+        top_p, temperature是chatGPT的内部调优参数
+        history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+        chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+        additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    raw_input = "What I would like to say is the following: " + inputs
+    history.extend([inputs, ""])
+    chatbot.append([inputs, ""])
+    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+    prompt = raw_input
+    tgui_say = ""
+    model_name, addr_port = llm_kwargs['llm_model'].split('@')
+    assert ':' in addr_port, "LLM_MODEL 格式不正确！" + llm_kwargs['llm_model']
+    addr, port = addr_port.split(':')
+    mutable = ["", time.time()]
+    def run_coorotine(mutable):
+        async def get_result(mutable):
+            # "tgui:galactica-1.3b@localhost:7860"
+            async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
+                                      temperature=llm_kwargs['temperature'],
+                                      top_p=llm_kwargs['top_p'], addr=addr, port=port):
+                print(response[len(mutable[0]):])
+                mutable[0] = response
+                if (time.time() - mutable[1]) > 3:
+                    print('exit when no listener')
+                    break
+        asyncio.run(get_result(mutable))
+    thread_listen = threading.Thread(target=run_coorotine, args=(mutable,), daemon=True)
+    thread_listen.start()
+    while thread_listen.is_alive():
+        time.sleep(1)
+        mutable[1] = time.time()
+        # Print intermediate steps
+        if tgui_say != mutable[0]:
+            tgui_say = mutable[0]
+            history[-1] = tgui_say
+            chatbot[-1] = (history[-2], history[-1])
+            yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False):
+    raw_input = "What I would like to say is the following: " + inputs
+    prompt = raw_input
+    tgui_say = ""
+    model_name, addr_port = llm_kwargs['llm_model'].split('@')
+    assert ':' in addr_port, "LLM_MODEL 格式不正确！" + llm_kwargs['llm_model']
+    addr, port = addr_port.split(':')
+    def run_coorotine(observe_window):
+        async def get_result(observe_window):
+            async for response in run(context=prompt, max_token=llm_kwargs['max_length'],
+                                      temperature=llm_kwargs['temperature'],
+                                      top_p=llm_kwargs['top_p'], addr=addr, port=port):
+                print(response[len(observe_window[0]):])
+                observe_window[0] = response
+                if (time.time() - observe_window[1]) > 5:
+                    print('exit when no listener')
+                    break
+        asyncio.run(get_result(observe_window))
+    thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,))
+    thread_listen.start()
+    return observe_window[0]

request_llms/bridge_zhipu.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import time
+from toolbox import update_ui, get_conf, update_ui_lastest_msg
+from toolbox import check_packages, report_exception
+model_name = '智谱AI大模型'
+def validate_key():
+    ZHIPUAI_API_KEY = get_conf("ZHIPUAI_API_KEY")
+    if ZHIPUAI_API_KEY == '': return False
+    return True
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    watch_dog_patience = 5
+    response = ""
+    if validate_key() is False:
+        raise RuntimeError('请配置ZHIPUAI_API_KEY')
+    from .com_zhipuapi import ZhipuRequestInstance
+    sri = ZhipuRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, sys_prompt):
+        if len(observe_window) >= 1:
+            observe_window[0] = response
+        if len(observe_window) >= 2:
+            if (time.time()-observe_window[1]) > watch_dog_patience: raise RuntimeError("程序终止。")
+    return response
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llms/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history)
+    # 尝试导入依赖，如果缺少依赖，则给出安装建议
+    try:
+        check_packages(["zhipuai"])
+    except:
+        yield from update_ui_lastest_msg(f"导入软件依赖失败。使用该模型需要额外依赖，安装方法```pip install --upgrade zhipuai```。",
+                                         chatbot=chatbot, history=history, delay=0)
+        return
+    if validate_key() is False:
+        yield from update_ui_lastest_msg(lastmsg="[Local Message] 请配置ZHIPUAI_API_KEY", chatbot=chatbot, history=history, delay=0)
+        return
+    if additional_fn is not None:
+        from core_functional import handle_core_functionality
+        inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot)
+    # 开始接收回复
+    from .com_zhipuapi import ZhipuRequestInstance
+    sri = ZhipuRequestInstance()
+    for response in sri.generate(inputs, llm_kwargs, history, system_prompt):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+    # 总结输出
+    if response == f"[Local Message] 等待{model_name}响应中 ...":
+        response = f"[Local Message] {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)

request_llms/com_google.py ADDED Viewed

	@@ -0,0 +1,228 @@

+# encoding: utf-8
+# @Time   : 2023/12/25
+# @Author : Spike
+# @Descr   :
+import json
+import os
+import re
+import requests
+from typing import List, Dict, Tuple
+from toolbox import get_conf, encode_image, get_pictures_list
+proxies, TIMEOUT_SECONDS = get_conf("proxies", "TIMEOUT_SECONDS")
+"""
+========================================================================
+第五部分 一些文件处理方法
+files_filter_handler 根据type过滤文件
+input_encode_handler 提取input中的文件，并解析
+file_manifest_filter_html 根据type过滤文件, 并解析为html or md 文本
+link_mtime_to_md 文件增加本地时间参数，避免下载到缓存文件
+html_view_blank 超链接
+html_local_file 本地文件取相对路径
+to_markdown_tabs 文件list 转换为 md tab
+"""
+def files_filter_handler(file_list):
+    new_list = []
+    filter_ = [
+        "png",
+        "jpg",
+        "jpeg",
+        "bmp",
+        "svg",
+        "webp",
+        "ico",
+        "tif",
+        "tiff",
+        "raw",
+        "eps",
+    ]
+    for file in file_list:
+        file = str(file).replace("file=", "")
+        if os.path.exists(file):
+            if str(os.path.basename(file)).split(".")[-1] in filter_:
+                new_list.append(file)
+    return new_list
+def input_encode_handler(inputs, llm_kwargs):
+    if llm_kwargs["most_recent_uploaded"].get("path"):
+        image_paths = get_pictures_list(llm_kwargs["most_recent_uploaded"]["path"])
+    md_encode = []
+    for md_path in image_paths:
+        type_ = os.path.splitext(md_path)[1].replace(".", "")
+        type_ = "jpeg" if type_ == "jpg" else type_
+        md_encode.append({"data": encode_image(md_path), "type": type_})
+    return inputs, md_encode
+def file_manifest_filter_html(file_list, filter_: list = None, md_type=False):
+    new_list = []
+    if not filter_:
+        filter_ = [
+            "png",
+            "jpg",
+            "jpeg",
+            "bmp",
+            "svg",
+            "webp",
+            "ico",
+            "tif",
+            "tiff",
+            "raw",
+            "eps",
+        ]
+    for file in file_list:
+        if str(os.path.basename(file)).split(".")[-1] in filter_:
+            new_list.append(html_local_img(file, md=md_type))
+        elif os.path.exists(file):
+            new_list.append(link_mtime_to_md(file))
+        else:
+            new_list.append(file)
+    return new_list
+def link_mtime_to_md(file):
+    link_local = html_local_file(file)
+    link_name = os.path.basename(file)
+    a = f"[{link_name}]({link_local}?{os.path.getmtime(file)})"
+    return a
+def html_local_file(file):
+    base_path = os.path.dirname(__file__)  # 项目目录
+    if os.path.exists(str(file)):
+        file = f'file={file.replace(base_path, ".")}'
+    return file
+def html_local_img(__file, layout="left", max_width=None, max_height=None, md=True):
+    style = ""
+    if max_width is not None:
+        style += f"max-width: {max_width};"
+    if max_height is not None:
+        style += f"max-height: {max_height};"
+    __file = html_local_file(__file)
+    a = f'<div align="{layout}"><img src="{__file}" style="{style}"></div>'
+    if md:
+        a = f"![{__file}]({__file})"
+    return a
+def to_markdown_tabs(head: list, tabs: list, alignment=":---:", column=False):
+    """
+    Args:
+        head: 表头：[]
+        tabs: 表值：[[列1], [列2], [列3], [列4]]
+        alignment: :--- 左对齐， :---: 居中对齐， ---: 右对齐
+        column: True to keep data in columns, False to keep data in rows (default).
+    Returns:
+        A string representation of the markdown table.
+    """
+    if column:
+        transposed_tabs = list(map(list, zip(*tabs)))
+    else:
+        transposed_tabs = tabs
+    # Find the maximum length among the columns
+    max_len = max(len(column) for column in transposed_tabs)
+    tab_format = "| %s "
+    tabs_list = "".join([tab_format % i for i in head]) + "|\n"
+    tabs_list += "".join([tab_format % alignment for i in head]) + "|\n"
+    for i in range(max_len):
+        row_data = [tab[i] if i < len(tab) else "" for tab in transposed_tabs]
+        row_data = file_manifest_filter_html(row_data, filter_=None)
+        tabs_list += "".join([tab_format % i for i in row_data]) + "|\n"
+    return tabs_list
+class GoogleChatInit:
+    def __init__(self):
+        self.url_gemini = "https://generativelanguage.googleapis.com/v1beta/models/%m:streamGenerateContent?key=%k"
+    def generate_chat(self, inputs, llm_kwargs, history, system_prompt):
+        headers, payload = self.generate_message_payload(
+            inputs, llm_kwargs, history, system_prompt
+        )
+        response = requests.post(
+            url=self.url_gemini,
+            headers=headers,
+            data=json.dumps(payload),
+            stream=True,
+            proxies=proxies,
+            timeout=TIMEOUT_SECONDS,
+        )
+        return response.iter_lines()
+    def __conversation_user(self, user_input, llm_kwargs):
+        what_i_have_asked = {"role": "user", "parts": []}
+        if "vision" not in self.url_gemini:
+            input_ = user_input
+            encode_img = []
+        else:
+            input_, encode_img = input_encode_handler(user_input, llm_kwargs=llm_kwargs)
+        what_i_have_asked["parts"].append({"text": input_})
+        if encode_img:
+            for data in encode_img:
+                what_i_have_asked["parts"].append(
+                    {
+                        "inline_data": {
+                            "mime_type": f"image/{data['type']}",
+                            "data": data["data"],
+                        }
+                    }
+                )
+        return what_i_have_asked
+    def __conversation_history(self, history, llm_kwargs):
+        messages = []
+        conversation_cnt = len(history) // 2
+        if conversation_cnt:
+            for index in range(0, 2 * conversation_cnt, 2):
+                what_i_have_asked = self.__conversation_user(history[index], llm_kwargs)
+                what_gpt_answer = {
+                    "role": "model",
+                    "parts": [{"text": history[index + 1]}],
+                }
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+        return messages
+    def generate_message_payload(
+        self, inputs, llm_kwargs, history, system_prompt
+    ) -> Tuple[Dict, Dict]:
+        messages = [
+            # {"role": "system", "parts": [{"text": system_prompt}]},  # gemini 不允许对话轮次为偶数，所以这个没有用，看后续支持吧。。。
+            # {"role": "user", "parts": [{"text": ""}]},
+            # {"role": "model", "parts": [{"text": ""}]}
+        ]
+        self.url_gemini = self.url_gemini.replace(
+            "%m", llm_kwargs["llm_model"]
+        ).replace("%k", get_conf("GEMINI_API_KEY"))
+        header = {"Content-Type": "application/json"}
+        if "vision" not in self.url_gemini:  # 不是vision 才处理history
+            messages.extend(
+                self.__conversation_history(history, llm_kwargs)
+            )  # 处理 history
+        messages.append(self.__conversation_user(inputs, llm_kwargs))  # 处理用户对话
+        payload = {
+            "contents": messages,
+            "generationConfig": {
+                # "maxOutputTokens": 800,
+                "stopSequences": str(llm_kwargs.get("stop", "")).split(" "),
+                "temperature": llm_kwargs.get("temperature", 1),
+                "topP": llm_kwargs.get("top_p", 0.8),
+                "topK": 10,
+            },
+        }
+        return header, payload
+if __name__ == "__main__":
+    google = GoogleChatInit()
+    # print(gootle.generate_message_payload('你好呀', {},  ['123123', '3123123'], ''))
+    # gootle.input_encode_handle('123123[123123](./123123), ![53425](./asfafa/fff.jpg)')

request_llms/com_qwenapi.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from http import HTTPStatus
+from toolbox import get_conf
+import threading
+import logging
+timeout_bot_msg = '[Local Message] Request timeout. Network error.'
+class QwenRequestInstance():
+    def __init__(self):
+        import dashscope
+        self.time_to_yield_event = threading.Event()
+        self.time_to_exit_event = threading.Event()
+        self.result_buf = ""
+        def validate_key():
+            DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY")
+            if DASHSCOPE_API_KEY == '': return False
+            return True
+        if not validate_key():
+            raise RuntimeError('请配置 DASHSCOPE_API_KEY')
+        dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
+    def generate(self, inputs, llm_kwargs, history, system_prompt):
+        # import _thread as thread
+        from dashscope import Generation
+        QWEN_MODEL = {
+            'qwen-turbo': Generation.Models.qwen_turbo,
+            'qwen-plus': Generation.Models.qwen_plus,
+            'qwen-max': Generation.Models.qwen_max,
+        }[llm_kwargs['llm_model']]
+        top_p = llm_kwargs.get('top_p', 0.8)
+        if top_p == 0: top_p += 1e-5
+        if top_p == 1: top_p -= 1e-5
+        self.result_buf = ""
+        responses = Generation.call(
+            model=QWEN_MODEL,
+            messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
+            top_p=top_p,
+            temperature=llm_kwargs.get('temperature', 1.0),
+            result_format='message',
+            stream=True,
+            incremental_output=True
+        )
+        for response in responses:
+            if response.status_code == HTTPStatus.OK:
+                if response.output.choices[0].finish_reason == 'stop':
+                    yield self.result_buf
+                    break
+                elif response.output.choices[0].finish_reason == 'length':
+                    self.result_buf += "[Local Message] 生成长度过长，后续输出被截断"
+                    yield self.result_buf
+                    break
+                else:
+                    self.result_buf += response.output.choices[0].message.content
+                    yield self.result_buf
+            else:
+                self.result_buf += f"[Local Message] 请求错误：状态码：{response.status_code}，错误码:{response.code}，消息：{response.message}"
+                yield self.result_buf
+                break
+        logging.info(f'[raw_input] {inputs}')
+        logging.info(f'[response] {self.result_buf}')
+        return self.result_buf
+def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
+    conversation_cnt = len(history) // 2
+    if system_prompt == '': system_prompt = 'Hello!'
+    messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "":
+                    continue
+                if what_gpt_answer["content"] == timeout_bot_msg:
+                    continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    return messages

request_llms/com_sparkapi.py ADDED Viewed

	@@ -0,0 +1,217 @@

+from toolbox import get_conf, get_pictures_list, encode_image
+import base64
+import datetime
+import hashlib
+import hmac
+import json
+from urllib.parse import urlparse
+import ssl
+from datetime import datetime
+from time import mktime
+from urllib.parse import urlencode
+from wsgiref.handlers import format_date_time
+import websocket
+import threading, time
+timeout_bot_msg = '[Local Message] Request timeout. Network error.'
+class Ws_Param(object):
+    # 初始化
+    def __init__(self, APPID, APIKey, APISecret, gpt_url):
+        self.APPID = APPID
+        self.APIKey = APIKey
+        self.APISecret = APISecret
+        self.host = urlparse(gpt_url).netloc
+        self.path = urlparse(gpt_url).path
+        self.gpt_url = gpt_url
+    # 生成url
+    def create_url(self):
+        # 生成RFC1123格式的时间戳
+        now = datetime.now()
+        date = format_date_time(mktime(now.timetuple()))
+        # 拼接字符串
+        signature_origin = "host: " + self.host + "\n"
+        signature_origin += "date: " + date + "\n"
+        signature_origin += "GET " + self.path + " HTTP/1.1"
+        # 进行hmac-sha256进行加密
+        signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'), digestmod=hashlib.sha256).digest()
+        signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding='utf-8')
+        authorization_origin = f'api_key="{self.APIKey}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha_base64}"'
+        authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
+        # 将请求的鉴权参数组合为字典
+        v = {
+            "authorization": authorization,
+            "date": date,
+            "host": self.host
+        }
+        # 拼接鉴权参数，生成url
+        url = self.gpt_url + '?' + urlencode(v)
+        # 此处打印出建立连接时候的url,参考本demo的时候可取消上方打印的注释，比对相同参数时生成的url与自己代码生成的url是否一致
+        return url
+class SparkRequestInstance():
+    def __init__(self):
+        XFYUN_APPID, XFYUN_API_SECRET, XFYUN_API_KEY = get_conf('XFYUN_APPID', 'XFYUN_API_SECRET', 'XFYUN_API_KEY')
+        if XFYUN_APPID == '00000000' or XFYUN_APPID == '': raise RuntimeError('请配置讯飞星火大模型的XFYUN_APPID, XFYUN_API_KEY, XFYUN_API_SECRET')
+        self.appid = XFYUN_APPID
+        self.api_secret = XFYUN_API_SECRET
+        self.api_key = XFYUN_API_KEY
+        self.gpt_url = "ws://spark-api.xf-yun.com/v1.1/chat"
+        self.gpt_url_v2 = "ws://spark-api.xf-yun.com/v2.1/chat"
+        self.gpt_url_v3 = "ws://spark-api.xf-yun.com/v3.1/chat"
+        self.gpt_url_img = "wss://spark-api.cn-huabei-1.xf-yun.com/v2.1/image"
+        self.time_to_yield_event = threading.Event()
+        self.time_to_exit_event = threading.Event()
+        self.result_buf = ""
+    def generate(self, inputs, llm_kwargs, history, system_prompt, use_image_api=False):
+        llm_kwargs = llm_kwargs
+        history = history
+        system_prompt = system_prompt
+        import _thread as thread
+        thread.start_new_thread(self.create_blocking_request, (inputs, llm_kwargs, history, system_prompt, use_image_api))
+        while True:
+            self.time_to_yield_event.wait(timeout=1)
+            if self.time_to_yield_event.is_set():
+                yield self.result_buf
+            if self.time_to_exit_event.is_set():
+                return self.result_buf
+    def create_blocking_request(self, inputs, llm_kwargs, history, system_prompt, use_image_api):
+        if llm_kwargs['llm_model'] == 'sparkv2':
+            gpt_url = self.gpt_url_v2
+        elif llm_kwargs['llm_model'] == 'sparkv3':
+            gpt_url = self.gpt_url_v3
+        else:
+            gpt_url = self.gpt_url
+        file_manifest = []
+        if use_image_api and llm_kwargs.get('most_recent_uploaded'):
+            if llm_kwargs['most_recent_uploaded'].get('path'):
+                file_manifest = get_pictures_list(llm_kwargs['most_recent_uploaded']['path'])
+                if len(file_manifest) > 0:
+                    print('正在使用讯飞图片理解API')
+                    gpt_url = self.gpt_url_img
+        wsParam = Ws_Param(self.appid, self.api_key, self.api_secret, gpt_url)
+        websocket.enableTrace(False)
+        wsUrl = wsParam.create_url()
+        # 收到websocket连接建立的处理
+        def on_open(ws):
+            import _thread as thread
+            thread.start_new_thread(run, (ws,))
+        def run(ws, *args):
+            data = json.dumps(gen_params(ws.appid, *ws.all_args, file_manifest))
+            ws.send(data)
+        # 收到websocket消息的处理
+        def on_message(ws, message):
+            data = json.loads(message)
+            code = data['header']['code']
+            if code != 0:
+                print(f'请求错误: {code}, {data}')
+                self.result_buf += str(data)
+                ws.close()
+                self.time_to_exit_event.set()
+            else:
+                choices = data["payload"]["choices"]
+                status = choices["status"]
+                content = choices["text"][0]["content"]
+                ws.content += content
+                self.result_buf += content
+                if status == 2:
+                    ws.close()
+                    self.time_to_exit_event.set()
+            self.time_to_yield_event.set()
+        # 收到websocket错误的处理
+        def on_error(ws, error):
+            print("error:", error)
+            self.time_to_exit_event.set()
+        # 收到websocket关闭的处理
+        def on_close(ws, *args):
+            self.time_to_exit_event.set()
+        # websocket
+        ws = websocket.WebSocketApp(wsUrl, on_message=on_message, on_error=on_error, on_close=on_close, on_open=on_open)
+        ws.appid = self.appid
+        ws.content = ""
+        ws.all_args = (inputs, llm_kwargs, history, system_prompt)
+        ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
+def generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest):
+    conversation_cnt = len(history) // 2
+    messages = []
+    if file_manifest:
+        base64_images = []
+        for image_path in file_manifest:
+            base64_images.append(encode_image(image_path))
+        for img_s in base64_images:
+            if img_s not in str(messages):
+                messages.append({"role": "user", "content": img_s, "content_type": "image"})
+    else:
+        messages = [{"role": "system", "content": system_prompt}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "": continue
+                if what_gpt_answer["content"] == timeout_bot_msg: continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    return messages
+def gen_params(appid, inputs, llm_kwargs, history, system_prompt, file_manifest):
+    """
+    通过appid和用户的提问来生成请参数
+    """
+    domains = {
+        "spark": "general",
+        "sparkv2": "generalv2",
+        "sparkv3": "generalv3",
+    }
+    domains_select = domains[llm_kwargs['llm_model']]
+    if file_manifest: domains_select = 'image'
+    data = {
+        "header": {
+            "app_id": appid,
+            "uid": "1234"
+        },
+        "parameter": {
+            "chat": {
+                "domain": domains_select,
+                "temperature": llm_kwargs["temperature"],
+                "random_threshold": 0.5,
+                "max_tokens": 4096,
+                "auditing": "default"
+            }
+        },
+        "payload": {
+            "message": {
+                "text": generate_message_payload(inputs, llm_kwargs, history, system_prompt, file_manifest)
+            }
+        }
+    }
+    return data

request_llms/com_zhipuapi.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from toolbox import get_conf
+import threading
+import logging
+timeout_bot_msg = '[Local Message] Request timeout. Network error.'
+class ZhipuRequestInstance():
+    def __init__(self):
+        self.time_to_yield_event = threading.Event()
+        self.time_to_exit_event = threading.Event()
+        self.result_buf = ""
+    def generate(self, inputs, llm_kwargs, history, system_prompt):
+        # import _thread as thread
+        import zhipuai
+        ZHIPUAI_API_KEY, ZHIPUAI_MODEL = get_conf("ZHIPUAI_API_KEY", "ZHIPUAI_MODEL")
+        zhipuai.api_key = ZHIPUAI_API_KEY
+        self.result_buf = ""
+        response = zhipuai.model_api.sse_invoke(
+            model=ZHIPUAI_MODEL,
+            prompt=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
+            top_p=llm_kwargs['top_p'],
+            temperature=llm_kwargs['temperature'],
+        )
+        for event in response.events():
+            if event.event == "add":
+                self.result_buf += event.data
+                yield self.result_buf
+            elif event.event == "error" or event.event == "interrupted":
+                raise RuntimeError("Unknown error:" + event.data)
+            elif event.event == "finish":
+                yield self.result_buf
+                break
+            else:
+                raise RuntimeError("Unknown error:" + str(event))
+        logging.info(f'[raw_input] {inputs}')
+        logging.info(f'[response] {self.result_buf}')
+        return self.result_buf
+def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
+    conversation_cnt = len(history) // 2
+    messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "":
+                    continue
+                if what_gpt_answer["content"] == timeout_bot_msg:
+                    continue
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+    return messages

request_llms/edge_gpt_free.py ADDED Viewed

	@@ -0,0 +1,1125 @@

+"""
+========================================================================
+第一部分：来自EdgeGPT.py
+https://github.com/acheong08/EdgeGPT
+========================================================================
+"""
+"""
+Main.py
+"""
+import argparse
+import asyncio
+import json
+import os
+import random
+import re
+import ssl
+import sys
+import time
+import uuid
+from enum import Enum
+from pathlib import Path
+from typing import Generator
+from typing import Literal
+from typing import Optional
+from typing import Union
+import aiohttp
+import certifi
+import httpx
+from prompt_toolkit import PromptSession
+from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
+from prompt_toolkit.completion import WordCompleter
+from prompt_toolkit.history import InMemoryHistory
+from prompt_toolkit.key_binding import KeyBindings
+from rich.live import Live
+from rich.markdown import Markdown
+DELIMITER = "\x1e"
+# Generate random IP between range 13.104.0.0/14
+FORWARDED_IP = (
+    f"13.{random.randint(104, 107)}.{random.randint(0, 255)}.{random.randint(0, 255)}"
+)
+HEADERS = {
+    "accept": "application/json",
+    "accept-language": "en-US,en;q=0.9",
+    "content-type": "application/json",
+    "sec-ch-ua": '"Not_A Brand";v="99", "Microsoft Edge";v="110", "Chromium";v="110"',
+    "sec-ch-ua-arch": '"x86"',
+    "sec-ch-ua-bitness": '"64"',
+    "sec-ch-ua-full-version": '"109.0.1518.78"',
+    "sec-ch-ua-full-version-list": '"Chromium";v="110.0.5481.192", "Not A(Brand";v="24.0.0.0", "Microsoft Edge";v="110.0.1587.69"',
+    "sec-ch-ua-mobile": "?0",
+    "sec-ch-ua-model": "",
+    "sec-ch-ua-platform": '"Windows"',
+    "sec-ch-ua-platform-version": '"15.0.0"',
+    "sec-fetch-dest": "empty",
+    "sec-fetch-mode": "cors",
+    "sec-fetch-site": "same-origin",
+    "x-ms-client-request-id": str(uuid.uuid4()),
+    "x-ms-useragent": "azsdk-js-api-client-factory/1.0.0-beta.1 core-rest-pipeline/1.10.0 OS/Win32",
+    "Referer": "https://www.bing.com/search?q=Bing+AI&showconv=1&FORM=hpcodx",
+    "Referrer-Policy": "origin-when-cross-origin",
+    "x-forwarded-for": FORWARDED_IP,
+}
+HEADERS_INIT_CONVER = {
+    "authority": "edgeservices.bing.com",
+    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+    "accept-language": "en-US,en;q=0.9",
+    "cache-control": "max-age=0",
+    "sec-ch-ua": '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
+    "sec-ch-ua-arch": '"x86"',
+    "sec-ch-ua-bitness": '"64"',
+    "sec-ch-ua-full-version": '"110.0.1587.69"',
+    "sec-ch-ua-full-version-list": '"Chromium";v="110.0.5481.192", "Not A(Brand";v="24.0.0.0", "Microsoft Edge";v="110.0.1587.69"',
+    "sec-ch-ua-mobile": "?0",
+    "sec-ch-ua-model": '""',
+    "sec-ch-ua-platform": '"Windows"',
+    "sec-ch-ua-platform-version": '"15.0.0"',
+    "sec-fetch-dest": "document",
+    "sec-fetch-mode": "navigate",
+    "sec-fetch-site": "none",
+    "sec-fetch-user": "?1",
+    "upgrade-insecure-requests": "1",
+    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.69",
+    "x-edge-shopping-flag": "1",
+    "x-forwarded-for": FORWARDED_IP,
+}
+ssl_context = ssl.create_default_context()
+ssl_context.load_verify_locations(certifi.where())
+class NotAllowedToAccess(Exception):
+    pass
+class ConversationStyle(Enum):
+    creative = [
+        "nlu_direct_response_filter",
+        "deepleo",
+        "disable_emoji_spoken_text",
+        "responsible_ai_policy_235",
+        "enablemm",
+        "h3imaginative",
+        "travelansgnd",
+        "dv3sugg",
+        "clgalileo",
+        "gencontentv3",
+        "dv3sugg",
+        "responseos",
+        "e2ecachewrite",
+        "cachewriteext",
+        "nodlcpcwrite",
+        "travelansgnd",
+        "nojbfedge",
+    ]
+    balanced = [
+        "nlu_direct_response_filter",
+        "deepleo",
+        "disable_emoji_spoken_text",
+        "responsible_ai_policy_235",
+        "enablemm",
+        "galileo",
+        "dv3sugg",
+        "responseos",
+        "e2ecachewrite",
+        "cachewriteext",
+        "nodlcpcwrite",
+        "travelansgnd",
+        "nojbfedge",
+    ]
+    precise = [
+        "nlu_direct_response_filter",
+        "deepleo",
+        "disable_emoji_spoken_text",
+        "responsible_ai_policy_235",
+        "enablemm",
+        "galileo",
+        "dv3sugg",
+        "responseos",
+        "e2ecachewrite",
+        "cachewriteext",
+        "nodlcpcwrite",
+        "travelansgnd",
+        "h3precise",
+        "clgalileo",
+        "nojbfedge",
+    ]
+CONVERSATION_STYLE_TYPE = Optional[
+    Union[ConversationStyle, Literal["creative", "balanced", "precise"]]
+]
+def _append_identifier(msg: dict) -> str:
+    """
+    Appends special character to end of message to identify end of message
+    """
+    # Convert dict to json string
+    return json.dumps(msg, ensure_ascii=False) + DELIMITER
+def _get_ran_hex(length: int = 32) -> str:
+    """
+    Returns random hex string
+    """
+    return "".join(random.choice("0123456789abcdef") for _ in range(length))
+class _ChatHubRequest:
+    """
+    Request object for ChatHub
+    """
+    def __init__(
+        self,
+        conversation_signature: str,
+        client_id: str,
+        conversation_id: str,
+        invocation_id: int = 0,
+    ) -> None:
+        self.struct: dict = {}
+        self.client_id: str = client_id
+        self.conversation_id: str = conversation_id
+        self.conversation_signature: str = conversation_signature
+        self.invocation_id: int = invocation_id
+    def update(
+        self,
+        prompt: str,
+        conversation_style: CONVERSATION_STYLE_TYPE,
+        options = None,
+        webpage_context = None,
+        search_result = False,
+    ) -> None:
+        """
+        Updates request object
+        """
+        if options is None:
+            options = [
+                "deepleo",
+                "enable_debug_commands",
+                "disable_emoji_spoken_text",
+                "enablemm",
+            ]
+        if conversation_style:
+            if not isinstance(conversation_style, ConversationStyle):
+                conversation_style = getattr(ConversationStyle, conversation_style)
+            options = conversation_style.value
+        self.struct = {
+            "arguments": [
+                {
+                    "source": "cib",
+                    "optionsSets": options,
+                    "allowedMessageTypes": [
+                        "Chat",
+                        "Disengaged",
+                        "AdsQuery",
+                        "SemanticSerp",
+                        "GenerateContentQuery",
+                        "SearchQuery",
+                    ],
+                    "sliceIds": [
+                        "chk1cf",
+                        "nopreloadsscf",
+                        "winlongmsg2tf",
+                        "perfimpcomb",
+                        "sugdivdis",
+                        "sydnoinputt",
+                        "wpcssopt",
+                        "wintone2tf",
+                        "0404sydicnbs0",
+                        "405suggbs0",
+                        "scctl",
+                        "330uaugs0",
+                        "0329resp",
+                        "udscahrfon",
+                        "udstrblm5",
+                        "404e2ewrt",
+                        "408nodedups0",
+                        "403tvlansgnd",
+                    ],
+                    "traceId": _get_ran_hex(32),
+                    "isStartOfSession": self.invocation_id == 0,
+                    "message": {
+                        "author": "user",
+                        "inputMethod": "Keyboard",
+                        "text": prompt,
+                        "messageType": "Chat",
+                    },
+                    "conversationSignature": self.conversation_signature,
+                    "participant": {
+                        "id": self.client_id,
+                    },
+                    "conversationId": self.conversation_id,
+                },
+            ],
+            "invocationId": str(self.invocation_id),
+            "target": "chat",
+            "type": 4,
+        }
+        if search_result:
+            have_search_result = [
+                "InternalSearchQuery",
+                "InternalSearchResult",
+                "InternalLoaderMessage",
+                "RenderCardRequest",
+            ]
+            self.struct["arguments"][0]["allowedMessageTypes"] += have_search_result
+        if webpage_context:
+            self.struct["arguments"][0]["previousMessages"] = [
+                {
+                    "author": "user",
+                    "description": webpage_context,
+                    "contextType": "WebPage",
+                    "messageType": "Context",
+                    "messageId": "discover-web--page-ping-mriduna-----",
+                },
+            ]
+        self.invocation_id += 1
+class _Conversation:
+    """
+    Conversation API
+    """
+    def __init__(
+        self,
+        proxy = None,
+        async_mode = False,
+        cookies = None,
+    ) -> None:
+        if async_mode:
+            return
+        self.struct: dict = {
+            "conversationId": None,
+            "clientId": None,
+            "conversationSignature": None,
+            "result": {"value": "Success", "message": None},
+        }
+        self.proxy = proxy
+        proxy = (
+            proxy
+            or os.environ.get("all_proxy")
+            or os.environ.get("ALL_PROXY")
+            or os.environ.get("https_proxy")
+            or os.environ.get("HTTPS_PROXY")
+            or None
+        )
+        if proxy is not None and proxy.startswith("socks5h://"):
+            proxy = "socks5://" + proxy[len("socks5h://") :]
+        self.session = httpx.Client(
+            proxies=proxy,
+            timeout=30,
+            headers=HEADERS_INIT_CONVER,
+        )
+        if cookies:
+            for cookie in cookies:
+                self.session.cookies.set(cookie["name"], cookie["value"])
+        # Send GET request
+        response = self.session.get(
+            url=os.environ.get("BING_PROXY_URL")
+            or "https://edgeservices.bing.com/edgesvc/turing/conversation/create",
+        )
+        if response.status_code != 200:
+            response = self.session.get(
+                "https://edge.churchless.tech/edgesvc/turing/conversation/create",
+            )
+        if response.status_code != 200:
+            print(f"Status code: {response.status_code}")
+            print(response.text)
+            print(response.url)
+            raise Exception("Authentication failed")
+        try:
+            self.struct = response.json()
+        except (json.decoder.JSONDecodeError, NotAllowedToAccess) as exc:
+            raise Exception(
+                "Authentication failed. You have not been accepted into the beta.",
+            ) from exc
+        if self.struct["result"]["value"] == "UnauthorizedRequest":
+            raise NotAllowedToAccess(self.struct["result"]["message"])
+    @staticmethod
+    async def create(
+        proxy = None,
+        cookies = None,
+    ):
+        self = _Conversation(async_mode=True)
+        self.struct = {
+            "conversationId": None,
+            "clientId": None,
+            "conversationSignature": None,
+            "result": {"value": "Success", "message": None},
+        }
+        self.proxy = proxy
+        proxy = (
+            proxy
+            or os.environ.get("all_proxy")
+            or os.environ.get("ALL_PROXY")
+            or os.environ.get("https_proxy")
+            or os.environ.get("HTTPS_PROXY")
+            or None
+        )
+        if proxy is not None and proxy.startswith("socks5h://"):
+            proxy = "socks5://" + proxy[len("socks5h://") :]
+        transport = httpx.AsyncHTTPTransport(retries=10)
+        # Convert cookie format to httpx format
+        formatted_cookies = None
+        if cookies:
+            formatted_cookies = httpx.Cookies()
+            for cookie in cookies:
+                formatted_cookies.set(cookie["name"], cookie["value"])
+        async with httpx.AsyncClient(
+            proxies=proxy,
+            timeout=30,
+            headers=HEADERS_INIT_CONVER,
+            transport=transport,
+            cookies=formatted_cookies,
+        ) as client:
+            # Send GET request
+            response = await client.get(
+                url=os.environ.get("BING_PROXY_URL")
+                or "https://edgeservices.bing.com/edgesvc/turing/conversation/create",
+            )
+            if response.status_code != 200:
+                response = await client.get(
+                    "https://edge.churchless.tech/edgesvc/turing/conversation/create",
+                )
+        if response.status_code != 200:
+            print(f"Status code: {response.status_code}")
+            print(response.text)
+            print(response.url)
+            raise Exception("Authentication failed")
+        try:
+            self.struct = response.json()
+        except (json.decoder.JSONDecodeError, NotAllowedToAccess) as exc:
+            raise Exception(
+                "Authentication failed. You have not been accepted into the beta.",
+            ) from exc
+        if self.struct["result"]["value"] == "UnauthorizedRequest":
+            raise NotAllowedToAccess(self.struct["result"]["message"])
+        return self
+class _ChatHub:
+    """
+    Chat API
+    """
+    def __init__(
+        self,
+        conversation: _Conversation,
+        proxy = None,
+        cookies = None,
+    ) -> None:
+        self.session = None
+        self.wss = None
+        self.request: _ChatHubRequest
+        self.loop: bool
+        self.task: asyncio.Task
+        self.request = _ChatHubRequest(
+            conversation_signature=conversation.struct["conversationSignature"],
+            client_id=conversation.struct["clientId"],
+            conversation_id=conversation.struct["conversationId"],
+        )
+        self.cookies = cookies
+        self.proxy: str = proxy
+    async def ask_stream(
+        self,
+        prompt: str,
+        wss_link: str,
+        conversation_style: CONVERSATION_STYLE_TYPE = None,
+        raw: bool = False,
+        options: dict = None,
+        webpage_context = None,
+        search_result: bool = False,
+    ) -> Generator[str, None, None]:
+        """
+        Ask a question to the bot
+        """
+        req_header = HEADERS
+        if self.cookies is not None:
+            ws_cookies = []
+            for cookie in self.cookies:
+                ws_cookies.append(f"{cookie['name']}={cookie['value']}")
+            req_header.update({
+                'Cookie': ';'.join(ws_cookies),
+            })
+        timeout = aiohttp.ClientTimeout(total=30)
+        self.session = aiohttp.ClientSession(timeout=timeout)
+        if self.wss and not self.wss.closed:
+            await self.wss.close()
+        # Check if websocket is closed
+        self.wss = await self.session.ws_connect(
+            wss_link,
+            headers=req_header,
+            ssl=ssl_context,
+            proxy=self.proxy,
+            autoping=False,
+        )
+        await self._initial_handshake()
+        if self.request.invocation_id == 0:
+            # Construct a ChatHub request
+            self.request.update(
+                prompt=prompt,
+                conversation_style=conversation_style,
+                options=options,
+                webpage_context=webpage_context,
+                search_result=search_result,
+            )
+        else:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    "https://sydney.bing.com/sydney/UpdateConversation/",
+                    json={
+                        "messages": [
+                            {
+                                "author": "user",
+                                "description": webpage_context,
+                                "contextType": "WebPage",
+                                "messageType": "Context",
+                            },
+                        ],
+                        "conversationId": self.request.conversation_id,
+                        "source": "cib",
+                        "traceId": _get_ran_hex(32),
+                        "participant": {"id": self.request.client_id},
+                        "conversationSignature": self.request.conversation_signature,
+                    },
+                )
+            if response.status_code != 200:
+                print(f"Status code: {response.status_code}")
+                print(response.text)
+                print(response.url)
+                raise Exception("Update web page context failed")
+            # Construct a ChatHub request
+            self.request.update(
+                prompt=prompt,
+                conversation_style=conversation_style,
+                options=options,
+            )
+        # Send request
+        await self.wss.send_str(_append_identifier(self.request.struct))
+        final = False
+        draw = False
+        resp_txt = ""
+        result_text = ""
+        resp_txt_no_link = ""
+        while not final:
+            msg = await self.wss.receive()
+            try:
+                objects = msg.data.split(DELIMITER)
+            except :
+                continue
+            for obj in objects:
+                if obj is None or not obj:
+                    continue
+                response = json.loads(obj)
+                if response.get("type") != 2 and raw:
+                    yield False, response
+                elif response.get("type") == 1 and response["arguments"][0].get(
+                    "messages",
+                ):
+                    if not draw:
+                        if (
+                            response["arguments"][0]["messages"][0].get("messageType")
+                            == "GenerateContentQuery"
+                        ):
+                            async with ImageGenAsync("", True) as image_generator:
+                                images = await image_generator.get_images(
+                                    response["arguments"][0]["messages"][0]["text"],
+                                )
+                            for i, image in enumerate(images):
+                                resp_txt = resp_txt + f"\n![image{i}]({image})"
+                            draw = True
+                        if (
+                            response["arguments"][0]["messages"][0]["contentOrigin"]
+                            != "Apology"
+                        ) and not draw:
+                            resp_txt = result_text + response["arguments"][0][
+                                "messages"
+                            ][0]["adaptiveCards"][0]["body"][0].get("text", "")
+                            resp_txt_no_link = result_text + response["arguments"][0][
+                                "messages"
+                            ][0].get("text", "")
+                            if response["arguments"][0]["messages"][0].get(
+                                "messageType",
+                            ):
+                                resp_txt = (
+                                    resp_txt
+                                    + response["arguments"][0]["messages"][0][
+                                        "adaptiveCards"
+                                    ][0]["body"][0]["inlines"][0].get("text")
+                                    + "\n"
+                                )
+                                result_text = (
+                                    result_text
+                                    + response["arguments"][0]["messages"][0][
+                                        "adaptiveCards"
+                                    ][0]["body"][0]["inlines"][0].get("text")
+                                    + "\n"
+                                )
+                        yield False, resp_txt
+                elif response.get("type") == 2:
+                    if response["item"]["result"].get("error"):
+                        await self.close()
+                        raise Exception(
+                            f"{response['item']['result']['value']}: {response['item']['result']['message']}",
+                        )
+                    if draw:
+                        cache = response["item"]["messages"][1]["adaptiveCards"][0][
+                            "body"
+                        ][0]["text"]
+                        response["item"]["messages"][1]["adaptiveCards"][0]["body"][0][
+                            "text"
+                        ] = (cache + resp_txt)
+                    if (
+                        response["item"]["messages"][-1]["contentOrigin"] == "Apology"
+                        and resp_txt
+                    ):
+                        response["item"]["messages"][-1]["text"] = resp_txt_no_link
+                        response["item"]["messages"][-1]["adaptiveCards"][0]["body"][0][
+                            "text"
+                        ] = resp_txt
+                        print(
+                            "Preserved the message from being deleted",
+                            file=sys.stderr,
+                        )
+                    final = True
+                    await self.close()
+                    yield True, response
+    async def _initial_handshake(self) -> None:
+        await self.wss.send_str(_append_identifier({"protocol": "json", "version": 1}))
+        await self.wss.receive()
+    async def close(self) -> None:
+        """
+        Close the connection
+        """
+        if self.wss and not self.wss.closed:
+            await self.wss.close()
+        if self.session and not self.session.closed:
+            await self.session.close()
+class Chatbot:
+    """
+    Combines everything to make it seamless
+    """
+    def __init__(
+        self,
+        proxy = None,
+        cookies = None,
+    ) -> None:
+        self.proxy = proxy
+        self.chat_hub: _ChatHub = _ChatHub(
+            _Conversation(self.proxy, cookies=cookies),
+            proxy=self.proxy,
+            cookies=cookies,
+        )
+    @staticmethod
+    async def create(
+        proxy = None,
+        cookies = None,
+    ):
+        self = Chatbot.__new__(Chatbot)
+        self.proxy = proxy
+        self.chat_hub = _ChatHub(
+            await _Conversation.create(self.proxy, cookies=cookies),
+            proxy=self.proxy,
+            cookies=cookies,
+        )
+        return self
+    async def ask(
+        self,
+        prompt: str,
+        wss_link: str = "wss://sydney.bing.com/sydney/ChatHub",
+        conversation_style: CONVERSATION_STYLE_TYPE = None,
+        options: dict = None,
+        webpage_context = None,
+        search_result: bool = False,
+    ) -> dict:
+        """
+        Ask a question to the bot
+        """
+        async for final, response in self.chat_hub.ask_stream(
+            prompt=prompt,
+            conversation_style=conversation_style,
+            wss_link=wss_link,
+            options=options,
+            webpage_context=webpage_context,
+            search_result=search_result,
+        ):
+            if final:
+                return response
+        await self.chat_hub.wss.close()
+        return {}
+    async def ask_stream(
+        self,
+        prompt: str,
+        wss_link: str = "wss://sydney.bing.com/sydney/ChatHub",
+        conversation_style: CONVERSATION_STYLE_TYPE = None,
+        raw: bool = False,
+        options: dict = None,
+        webpage_context = None,
+        search_result: bool = False,
+    ) -> Generator[str, None, None]:
+        """
+        Ask a question to the bot
+        """
+        async for response in self.chat_hub.ask_stream(
+            prompt=prompt,
+            conversation_style=conversation_style,
+            wss_link=wss_link,
+            raw=raw,
+            options=options,
+            webpage_context=webpage_context,
+            search_result=search_result,
+        ):
+            yield response
+    async def close(self) -> None:
+        """
+        Close the connection
+        """
+        await self.chat_hub.close()
+    async def reset(self) -> None:
+        """
+        Reset the conversation
+        """
+        await self.close()
+        self.chat_hub = _ChatHub(
+            await _Conversation.create(self.proxy),
+            proxy=self.proxy,
+            cookies=self.chat_hub.cookies,
+        )
+async def _get_input_async(
+    session: PromptSession = None,
+    completer: WordCompleter = None,
+) -> str:
+    """
+    Multiline input function.
+    """
+    return await session.prompt_async(
+        completer=completer,
+        multiline=True,
+        auto_suggest=AutoSuggestFromHistory(),
+    )
+def _create_session() -> PromptSession:
+    kb = KeyBindings()
+    @kb.add("enter")
+    def _(event):
+        buffer_text = event.current_buffer.text
+        if buffer_text.startswith("!"):
+            event.current_buffer.validate_and_handle()
+        else:
+            event.current_buffer.insert_text("\n")
+    @kb.add("escape")
+    def _(event):
+        if event.current_buffer.complete_state:
+            # event.current_buffer.cancel_completion()
+            event.current_buffer.text = ""
+    return PromptSession(key_bindings=kb, history=InMemoryHistory())
+def _create_completer(commands: list, pattern_str: str = "$"):
+    return WordCompleter(words=commands, pattern=re.compile(pattern_str))
+async def async_main(args: argparse.Namespace) -> None:
+    """
+    Main function
+    """
+    print("Initializing...")
+    print("Enter `alt+enter` or `escape+enter` to send a message")
+    # Read and parse cookies
+    cookies = None
+    if args.cookie_file:
+        cookies = json.loads(open(args.cookie_file, encoding="utf-8").read())
+    bot = await Chatbot.create(proxy=args.proxy, cookies=cookies)
+    session = _create_session()
+    completer = _create_completer(["!help", "!exit", "!reset"])
+    initial_prompt = args.prompt
+    while True:
+        print("\nYou:")
+        if initial_prompt:
+            question = initial_prompt
+            print(question)
+            initial_prompt = None
+        else:
+            question = (
+                input()
+                if args.enter_once
+                else await _get_input_async(session=session, completer=completer)
+            )
+        print()
+        if question == "!exit":
+            break
+        if question == "!help":
+            print(
+                """
+            !help - Show this help message
+            !exit - Exit the program
+            !reset - Reset the conversation
+            """,
+            )
+            continue
+        if question == "!reset":
+            await bot.reset()
+            continue
+        print("Bot:")
+        if args.no_stream:
+            print(
+                (
+                    await bot.ask(
+                        prompt=question,
+                        conversation_style=args.style,
+                        wss_link=args.wss_link,
+                    )
+                )["item"]["messages"][1]["adaptiveCards"][0]["body"][0]["text"],
+            )
+        else:
+            wrote = 0
+            if args.rich:
+                md = Markdown("")
+                with Live(md, auto_refresh=False) as live:
+                    async for final, response in bot.ask_stream(
+                        prompt=question,
+                        conversation_style=args.style,
+                        wss_link=args.wss_link,
+                    ):
+                        if not final:
+                            if wrote > len(response):
+                                print(md)
+                                print(Markdown("***Bing revoked the response.***"))
+                            wrote = len(response)
+                            md = Markdown(response)
+                            live.update(md, refresh=True)
+            else:
+                async for final, response in bot.ask_stream(
+                    prompt=question,
+                    conversation_style=args.style,
+                    wss_link=args.wss_link,
+                ):
+                    if not final:
+                        if not wrote:
+                            print(response, end="", flush=True)
+                        else:
+                            print(response[wrote:], end="", flush=True)
+                        wrote = len(response)
+                print()
+    await bot.close()
+def main() -> None:
+    print(
+        """
+        EdgeGPT - A demo of reverse engineering the Bing GPT chatbot
+        Repo: github.com/acheong08/EdgeGPT
+        By: Antonio Cheong
+        !help for help
+        Type !exit to exit
+    """,
+    )
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--enter-once", action="store_true")
+    parser.add_argument("--no-stream", action="store_true")
+    parser.add_argument("--rich", action="store_true")
+    parser.add_argument(
+        "--proxy",
+        help="Proxy URL (e.g. socks5://127.0.0.1:1080)",
+        type=str,
+    )
+    parser.add_argument(
+        "--wss-link",
+        help="WSS URL(e.g. wss://sydney.bing.com/sydney/ChatHub)",
+        type=str,
+        default="wss://sydney.bing.com/sydney/ChatHub",
+    )
+    parser.add_argument(
+        "--style",
+        choices=["creative", "balanced", "precise"],
+        default="balanced",
+    )
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        default="",
+        required=False,
+        help="prompt to start with",
+    )
+    parser.add_argument(
+        "--cookie-file",
+        type=str,
+        default="",
+        required=False,
+        help="path to cookie file",
+    )
+    args = parser.parse_args()
+    asyncio.run(async_main(args))
+class Cookie:
+    """
+    Convenience class for Bing Cookie files, data, and configuration. This Class
+    is updated dynamically by the Query class to allow cycling through >1
+    cookie/credentials file e.g. when daily request limits (current 200 per
+    account per day) are exceeded.
+    """
+    current_file_index = 0
+    dirpath = Path("./").resolve()
+    search_pattern = "bing_cookies_*.json"
+    ignore_files = set()
+    @classmethod
+    def fetch_default(cls, path=None):
+        from selenium import webdriver
+        from selenium.webdriver.common.by import By
+        driver = webdriver.Edge()
+        driver.get("https://bing.com/chat")
+        time.sleep(5)
+        xpath = '//button[@id="bnp_btn_accept"]'
+        driver.find_element(By.XPATH, xpath).click()
+        time.sleep(2)
+        xpath = '//a[@id="codexPrimaryButton"]'
+        driver.find_element(By.XPATH, xpath).click()
+        if path is None:
+            path = Path("./bing_cookies__default.json")
+            # Double underscore ensures this file is first when sorted
+        cookies = driver.get_cookies()
+        Path(path).write_text(json.dumps(cookies, indent=4), encoding="utf-8")
+        # Path again in case supplied path is: str
+        print(f"Cookies saved to: {path}")
+        driver.quit()
+    @classmethod
+    def files(cls):
+        """Return a sorted list of all cookie files matching .search_pattern"""
+        all_files = set(cls.dirpath.glob(cls.search_pattern))
+        return sorted(list(all_files - cls.ignore_files))
+    @classmethod
+    def import_data(cls):
+        """
+        Read the active cookie file and populate the following attributes:
+          .current_filepath
+          .current_data
+          .image_token
+        """
+        try:
+            cls.current_filepath = cls.files()[cls.current_file_index]
+        except IndexError:
+            print(
+                "> Please set Cookie.current_filepath to a valid cookie file, then run Cookie.import_data()",
+            )
+            return
+        print(f"> Importing cookies from: {cls.current_filepath.name}")
+        with open(cls.current_filepath, encoding="utf-8") as file:
+            cls.current_data = json.load(file)
+        cls.image_token = [x for x in cls.current_data if x.get("name") == "_U"]
+        cls.image_token = cls.image_token[0].get("value")
+    @classmethod
+    def import_next(cls):
+        """
+        Cycle through to the next cookies file.  Import it.  Mark the previous
+        file to be ignored for the remainder of the current session.
+        """
+        cls.ignore_files.add(cls.current_filepath)
+        if Cookie.current_file_index >= len(cls.files()):
+            Cookie.current_file_index = 0
+        Cookie.import_data()
+class Query:
+    """
+    A convenience class that wraps around EdgeGPT.Chatbot to encapsulate input,
+    config, and output all together.  Relies on Cookie class for authentication
+    """
+    def __init__(
+        self,
+        prompt,
+        style="precise",
+        content_type="text",
+        cookie_file=0,
+        echo=True,
+        echo_prompt=False,
+    ):
+        """
+        Arguments:
+        prompt: Text to enter into Bing Chat
+        style: creative, balanced, or precise
+        content_type: "text" for Bing Chat; "image" for Dall-e
+        cookie_file: Path, filepath string, or index (int) to list of cookie paths
+        echo: Print something to confirm request made
+        echo_prompt: Print confirmation of the evaluated prompt
+        """
+        self.index = []
+        self.request_count = {}
+        self.image_dirpath = Path("./").resolve()
+        Cookie.import_data()
+        self.index += [self]
+        self.prompt = prompt
+        files = Cookie.files()
+        if isinstance(cookie_file, int):
+            index = cookie_file if cookie_file < len(files) else 0
+        else:
+            if not isinstance(cookie_file, (str, Path)):
+                message = "'cookie_file' must be an int, str, or Path object"
+                raise TypeError(message)
+            cookie_file = Path(cookie_file)
+            if cookie_file in files():  # Supplied filepath IS in Cookie.dirpath
+                index = files.index(cookie_file)
+            else:  # Supplied filepath is NOT in Cookie.dirpath
+                if cookie_file.is_file():
+                    Cookie.dirpath = cookie_file.parent.resolve()
+                if cookie_file.is_dir():
+                    Cookie.dirpath = cookie_file.resolve()
+                index = 0
+        Cookie.current_file_index = index
+        if content_type == "text":
+            self.style = style
+            self.log_and_send_query(echo, echo_prompt)
+        if content_type == "image":
+            self.create_image()
+    def log_and_send_query(self, echo, echo_prompt):
+        self.response = asyncio.run(self.send_to_bing(echo, echo_prompt))
+        name = str(Cookie.current_filepath.name)
+        if not self.request_count.get(name):
+            self.request_count[name] = 1
+        else:
+            self.request_count[name] += 1
+    def create_image(self):
+        image_generator = ImageGen(Cookie.image_token)
+        image_generator.save_images(
+            image_generator.get_images(self.prompt),
+            output_dir=self.image_dirpath,
+        )
+    async def send_to_bing(self, echo=True, echo_prompt=False):
+        """Creat, submit, then close a Chatbot instance.  Return the response"""
+        retries = len(Cookie.files())
+        while retries:
+            try:
+                bot = await Chatbot.create()
+                if echo_prompt:
+                    print(f"> {self.prompt=}")
+                if echo:
+                    print("> Waiting for response...")
+                if self.style.lower() not in "creative balanced precise".split():
+                    self.style = "precise"
+                response = await bot.ask(
+                    prompt=self.prompt,
+                    conversation_style=getattr(ConversationStyle, self.style),
+                    # wss_link="wss://sydney.bing.com/sydney/ChatHub"
+                    # What other values can this parameter take? It seems to be optional
+                )
+                return response
+            except KeyError:
+                print(
+                    f"> KeyError [{Cookie.current_filepath.name} may have exceeded the daily limit]",
+                )
+                Cookie.import_next()
+                retries -= 1
+            finally:
+                await bot.close()
+    @property
+    def output(self):
+        """The response from a completed Chatbot request"""
+        return self.response["item"]["messages"][1]["text"]
+    @property
+    def sources(self):
+        """The source names and details parsed from a completed Chatbot request"""
+        return self.response["item"]["messages"][1]["sourceAttributions"]
+    @property
+    def sources_dict(self):
+        """The source names and details as a dictionary"""
+        sources_dict = {}
+        name = "providerDisplayName"
+        url = "seeMoreUrl"
+        for source in self.sources:
+            if name in source.keys() and url in source.keys():
+                sources_dict[source[name]] = source[url]
+            else:
+                continue
+        return sources_dict
+    @property
+    def code(self):
+        """Extract and join any snippets of Python code in the response"""
+        code_blocks = self.output.split("```")[1:-1:2]
+        code_blocks = ["\n".join(x.splitlines()[1:]) for x in code_blocks]
+        return "\n\n".join(code_blocks)
+    @property
+    def languages(self):
+        """Extract all programming languages given in code blocks"""
+        code_blocks = self.output.split("```")[1:-1:2]
+        return {x.splitlines()[0] for x in code_blocks}
+    @property
+    def suggestions(self):
+        """Follow-on questions suggested by the Chatbot"""
+        return [
+            x["text"]
+            for x in self.response["item"]["messages"][1]["suggestedResponses"]
+        ]
+    def __repr__(self):
+        return f"<EdgeGPT.Query: {self.prompt}>"
+    def __str__(self):
+        return self.output
+class ImageQuery(Query):
+    def __init__(self, prompt, **kwargs):
+        kwargs.update({"content_type": "image"})
+        super().__init__(prompt, **kwargs)
+    def __repr__(self):
+        return f"<EdgeGPT.ImageQuery: {self.prompt}>"
+if __name__ == "__main__":
+    main()

request_llms/key_manager.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import random
+def Singleton(cls):
+    _instance = {}
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+    return _singleton
+@Singleton
+class OpenAI_ApiKeyManager():
+    def __init__(self, mode='blacklist') -> None:
+        # self.key_avail_list = []
+        self.key_black_list = []
+    def add_key_to_blacklist(self, key):
+        self.key_black_list.append(key)
+    def select_avail_key(self, key_list):
+        # select key from key_list, but avoid keys also in self.key_black_list, raise error if no key can be found
+        available_keys = [key for key in key_list if key not in self.key_black_list]
+        if not available_keys:
+            raise KeyError("No available key found.")
+        selected_key = random.choice(available_keys)
+        return selected_key

request_llms/local_llm_class.py ADDED Viewed

	@@ -0,0 +1,319 @@

+import time
+import threading
+from toolbox import update_ui, Singleton
+from multiprocessing import Process, Pipe
+from contextlib import redirect_stdout
+from request_llms.queued_pipe import create_queue_pipe
+class ThreadLock(object):
+    def __init__(self):
+        self._lock = threading.Lock()
+    def acquire(self):
+        # print("acquiring", self)
+        #traceback.print_tb
+        self._lock.acquire()
+        # print("acquired", self)
+    def release(self):
+        # print("released", self)
+        #traceback.print_tb
+        self._lock.release()
+    def __enter__(self):
+        self.acquire()
+    def __exit__(self, type, value, traceback):
+        self.release()
+@Singleton
+class GetSingletonHandle():
+    def __init__(self):
+        self.llm_model_already_running = {}
+    def get_llm_model_instance(self, cls, *args, **kargs):
+        if cls not in self.llm_model_already_running:
+            self.llm_model_already_running[cls] = cls(*args, **kargs)
+            return self.llm_model_already_running[cls]
+        elif self.llm_model_already_running[cls].corrupted:
+            self.llm_model_already_running[cls] = cls(*args, **kargs)
+            return self.llm_model_already_running[cls]
+        else:
+            return self.llm_model_already_running[cls]
+def reset_tqdm_output():
+    import sys, tqdm
+    def status_printer(self, file):
+        fp = file
+        if fp in (sys.stderr, sys.stdout):
+            getattr(sys.stderr, 'flush', lambda: None)()
+            getattr(sys.stdout, 'flush', lambda: None)()
+        def fp_write(s):
+            print(s)
+        last_len = [0]
+        def print_status(s):
+            from tqdm.utils import disp_len
+            len_s = disp_len(s)
+            fp_write('\r' + s + (' ' * max(last_len[0] - len_s, 0)))
+            last_len[0] = len_s
+        return print_status
+    tqdm.tqdm.status_printer = status_printer
+class LocalLLMHandle(Process):
+    def __init__(self):
+        # ⭐run in main process
+        super().__init__(daemon=True)
+        self.is_main_process = True # init
+        self.corrupted = False
+        self.load_model_info()
+        self.parent, self.child = create_queue_pipe()
+        self.parent_state, self.child_state = create_queue_pipe()
+        # allow redirect_stdout
+        self.std_tag = "[Subprocess Message] "
+        self.running = True
+        self._model = None
+        self._tokenizer = None
+        self.state = ""
+        self.check_dependency()
+        self.is_main_process = False    # state wrap for child process
+        self.start()
+        self.is_main_process = True     # state wrap for child process
+        self.threadLock = ThreadLock()
+    def get_state(self):
+        # ⭐run in main process
+        while self.parent_state.poll():
+            self.state = self.parent_state.recv()
+        return self.state
+    def set_state(self, new_state):
+        # ⭐run in main process or 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        if self.is_main_process:
+            self.state = new_state
+        else:
+            self.child_state.send(new_state)
+    def load_model_info(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        raise NotImplementedError("Method not implemented yet")
+        self.model_name = ""
+        self.cmd_to_install = ""
+    def load_model_and_tokenizer(self):
+        """
+        This function should return the model and the tokenizer
+        """
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        raise NotImplementedError("Method not implemented yet")
+    def llm_stream_generator(self, **kwargs):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        raise NotImplementedError("Method not implemented yet")
+    def try_to_import_special_deps(self, **kwargs):
+        """
+        import something that will raise error if the user does not install requirement_*.txt
+        """
+        # ⭐run in main process
+        raise NotImplementedError("Method not implemented yet")
+    def check_dependency(self):
+        # ⭐run in main process
+        try:
+            self.try_to_import_special_deps()
+            self.set_state("`依赖检测通过`")
+            self.running = True
+        except:
+            self.set_state(f"缺少{self.model_name}的依赖，如果要使用{self.model_name}，除了基础的pip依赖以外，您还需要运行{self.cmd_to_install}安装{self.model_name}的依赖。")
+            self.running = False
+    def run(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ run in child process
+        # 第一次运行，加载参数
+        self.child.flush = lambda *args: None
+        self.child.write = lambda x: self.child.send(self.std_tag + x)
+        reset_tqdm_output()
+        self.set_state("`尝试加载模型`")
+        try:
+            with redirect_stdout(self.child):
+                self._model, self._tokenizer = self.load_model_and_tokenizer()
+        except:
+            self.set_state("`加载模型失败`")
+            self.running = False
+            from toolbox import trimmed_format_exc
+            self.child.send(
+                f'[Local Message] 不能正常加载{self.model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
+            self.child.send('[FinishBad]')
+            raise RuntimeError(f"不能正常加载{self.model_name}的参数！")
+        self.set_state("`准备就绪`")
+        while True:
+            # 进入任务等待状态
+            kwargs = self.child.recv()
+            # 收到消息，开始请求
+            try:
+                for response_full in self.llm_stream_generator(**kwargs):
+                    self.child.send(response_full)
+                    # print('debug' + response_full)
+                self.child.send('[Finish]')
+                # 请求处理结束，开始下一个循环
+            except:
+                from toolbox import trimmed_format_exc
+                self.child.send(
+                    f'[Local Message] 调用{self.model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
+                self.child.send('[Finish]')
+    def clear_pending_messages(self):
+        # ⭐run in main process
+        while True:
+            if  self.parent.poll():
+                self.parent.recv()
+                continue
+            for _ in range(5):
+                time.sleep(0.5)
+                if  self.parent.poll():
+                    r = self.parent.recv()
+                    continue
+            break
+        return
+    def stream_chat(self, **kwargs):
+        # ⭐run in main process
+        if self.get_state() == "`准备就绪`":
+            yield "`正在等待线程锁，排队中请稍候 ...`"
+        with self.threadLock:
+            if self.parent.poll():
+                yield "`排队中请稍候 ...`"
+                self.clear_pending_messages()
+            self.parent.send(kwargs)
+            std_out = ""
+            std_out_clip_len = 4096
+            while True:
+                res = self.parent.recv()
+                # pipe_watch_dog.feed()
+                if res.startswith(self.std_tag):
+                    new_output = res[len(self.std_tag):]
+                    std_out = std_out[:std_out_clip_len]
+                    print(new_output, end='')
+                    std_out = new_output + std_out
+                    yield self.std_tag + '\n```\n' + std_out + '\n```\n'
+                elif res == '[Finish]':
+                    break
+                elif res == '[FinishBad]':
+                    self.running = False
+                    self.corrupted = True
+                    break
+                else:
+                    std_out = ""
+                    yield res
+def get_local_llm_predict_fns(LLMSingletonClass, model_name, history_format='classic'):
+    load_message = f"{model_name}尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，{model_name}消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"
+    def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+        """
+            refer to request_llms/bridge_all.py
+        """
+        _llm_handle = GetSingletonHandle().get_llm_model_instance(LLMSingletonClass)
+        if len(observe_window) >= 1:
+            observe_window[0] = load_message + "\n\n" + _llm_handle.get_state()
+        if not _llm_handle.running:
+            raise RuntimeError(_llm_handle.get_state())
+        if history_format == 'classic':
+            # 没有 sys_prompt 接口，因此把prompt加入 history
+            history_feedin = []
+            history_feedin.append([sys_prompt, "Certainly!"])
+            for i in range(len(history)//2):
+                history_feedin.append([history[2*i], history[2*i+1]])
+        elif history_format == 'chatglm3':
+            # 有 sys_prompt 接口
+            conversation_cnt = len(history) // 2
+            history_feedin = [{"role": "system", "content": sys_prompt}]
+            if conversation_cnt:
+                for index in range(0, 2*conversation_cnt, 2):
+                    what_i_have_asked = {}
+                    what_i_have_asked["role"] = "user"
+                    what_i_have_asked["content"] = history[index]
+                    what_gpt_answer = {}
+                    what_gpt_answer["role"] = "assistant"
+                    what_gpt_answer["content"] = history[index+1]
+                    if what_i_have_asked["content"] != "":
+                        if what_gpt_answer["content"] == "":
+                            continue
+                        history_feedin.append(what_i_have_asked)
+                        history_feedin.append(what_gpt_answer)
+                    else:
+                        history_feedin[-1]['content'] = what_gpt_answer['content']
+        watch_dog_patience = 5  # 看门狗 (watchdog) 的耐心, ��置5秒即可
+        response = ""
+        for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+            if len(observe_window) >= 1:
+                observe_window[0] = response
+            if len(observe_window) >= 2:
+                if (time.time()-observe_window[1]) > watch_dog_patience:
+                    raise RuntimeError("程序终止。")
+        return response
+    def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream=True, additional_fn=None):
+        """
+            refer to request_llms/bridge_all.py
+        """
+        chatbot.append((inputs, ""))
+        _llm_handle = GetSingletonHandle().get_llm_model_instance(LLMSingletonClass)
+        chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.get_state())
+        yield from update_ui(chatbot=chatbot, history=[])
+        if not _llm_handle.running:
+            raise RuntimeError(_llm_handle.get_state())
+        if additional_fn is not None:
+            from core_functional import handle_core_functionality
+            inputs, history = handle_core_functionality(
+                additional_fn, inputs, history, chatbot)
+        # 处理历史信息
+        if history_format == 'classic':
+            # 没有 sys_prompt 接口，因此把prompt加入 history
+            history_feedin = []
+            history_feedin.append([system_prompt, "Certainly!"])
+            for i in range(len(history)//2):
+                history_feedin.append([history[2*i], history[2*i+1]])
+        elif history_format == 'chatglm3':
+            # 有 sys_prompt 接口
+            conversation_cnt = len(history) // 2
+            history_feedin = [{"role": "system", "content": system_prompt}]
+            if conversation_cnt:
+                for index in range(0, 2*conversation_cnt, 2):
+                    what_i_have_asked = {}
+                    what_i_have_asked["role"] = "user"
+                    what_i_have_asked["content"] = history[index]
+                    what_gpt_answer = {}
+                    what_gpt_answer["role"] = "assistant"
+                    what_gpt_answer["content"] = history[index+1]
+                    if what_i_have_asked["content"] != "":
+                        if what_gpt_answer["content"] == "":
+                            continue
+                        history_feedin.append(what_i_have_asked)
+                        history_feedin.append(what_gpt_answer)
+                    else:
+                        history_feedin[-1]['content'] = what_gpt_answer['content']
+        # 开始接收回复
+        response = f"[Local Message] 等待{model_name}响应中 ..."
+        for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+            chatbot[-1] = (inputs, response)
+            yield from update_ui(chatbot=chatbot, history=history)
+        # 总结输出
+        if response == f"[Local Message] 等待{model_name}响应中 ...":
+            response = f"[Local Message] {model_name}响应异常 ..."
+        history.extend([inputs, response])
+        yield from update_ui(chatbot=chatbot, history=history)
+    return predict_no_ui_long_connection, predict

request_llms/queued_pipe.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from multiprocessing import Pipe, Queue
+import time
+import threading
+class PipeSide(object):
+    def __init__(self, q_2remote, q_2local) -> None:
+        self.q_2remote = q_2remote
+        self.q_2local = q_2local
+    def recv(self):
+        return self.q_2local.get()
+    def send(self, buf):
+        self.q_2remote.put(buf)
+    def poll(self):
+        return not self.q_2local.empty()
+def create_queue_pipe():
+    q_p2c = Queue()
+    q_c2p = Queue()
+    pipe_c = PipeSide(q_2local=q_p2c, q_2remote=q_c2p)
+    pipe_p = PipeSide(q_2local=q_c2p, q_2remote=q_p2c)
+    return pipe_c, pipe_p

request_llms/requirements_chatglm.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+protobuf
+cpm_kernels
+torch>=1.10
+mdtex2html
+sentencepiece

request_llms/requirements_newbing.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+BingImageCreator
+certifi
+httpx
+prompt_toolkit
+requests
+rich
+websockets
+httpx[socks]

request_llms/requirements_qwen.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ dashscope

request_llms/requirements_qwen_local.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+modelscope
+transformers_stream_generator
+auto-gptq
+optimum
+urllib3<2