| | ''' |
| | Contributed by SagsMug. Modified by binary-husky |
| | https://github.com/oobabooga/text-generation-webui/pull/175 |
| | ''' |
| |
|
| | import asyncio |
| | import json |
| | import random |
| | import string |
| | import websockets |
| | import logging |
| | import time |
| | import threading |
| | import importlib |
| | from toolbox import get_conf, update_ui |
| |
|
| |
|
| | def random_hash(): |
| | letters = string.ascii_lowercase + string.digits |
| | return ''.join(random.choice(letters) for i in range(9)) |
| |
|
| | async def run(context, max_token, temperature, top_p, addr, port): |
| | params = { |
| | 'max_new_tokens': max_token, |
| | 'do_sample': True, |
| | 'temperature': temperature, |
| | 'top_p': top_p, |
| | 'typical_p': 1, |
| | 'repetition_penalty': 1.05, |
| | 'encoder_repetition_penalty': 1.0, |
| | 'top_k': 0, |
| | 'min_length': 0, |
| | 'no_repeat_ngram_size': 0, |
| | 'num_beams': 1, |
| | 'penalty_alpha': 0, |
| | 'length_penalty': 1, |
| | 'early_stopping': True, |
| | 'seed': -1, |
| | } |
| | session = random_hash() |
| |
|
| | async with websockets.connect(f"ws://{addr}:{port}/queue/join") as websocket: |
| | while content := json.loads(await websocket.recv()): |
| | |
| | if content["msg"] == "send_hash": |
| | await websocket.send(json.dumps({ |
| | "session_hash": session, |
| | "fn_index": 12 |
| | })) |
| | elif content["msg"] == "estimation": |
| | pass |
| | elif content["msg"] == "send_data": |
| | await websocket.send(json.dumps({ |
| | "session_hash": session, |
| | "fn_index": 12, |
| | "data": [ |
| | context, |
| | params['max_new_tokens'], |
| | params['do_sample'], |
| | params['temperature'], |
| | params['top_p'], |
| | params['typical_p'], |
| | params['repetition_penalty'], |
| | params['encoder_repetition_penalty'], |
| | params['top_k'], |
| | params['min_length'], |
| | params['no_repeat_ngram_size'], |
| | params['num_beams'], |
| | params['penalty_alpha'], |
| | params['length_penalty'], |
| | params['early_stopping'], |
| | params['seed'], |
| | ] |
| | })) |
| | elif content["msg"] == "process_starts": |
| | pass |
| | elif content["msg"] in ["process_generating", "process_completed"]: |
| | yield content["output"]["data"][0] |
| | |
| | |
| | if (content["msg"] == "process_completed"): |
| | break |
| |
|
| |
|
| |
|
| |
|
| |
|
| | def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): |
| | """ |
| | 发送至chatGPT,流式获取输出。 |
| | 用于基础的对话功能。 |
| | inputs 是本次问询的输入 |
| | top_p, temperature是chatGPT的内部调优参数 |
| | history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) |
| | chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 |
| | additional_fn代表点击的哪个按钮,按钮见functional.py |
| | """ |
| | if additional_fn is not None: |
| | from core_functional import handle_core_functionality |
| | inputs, history = handle_core_functionality(additional_fn, inputs, history, chatbot) |
| |
|
| | raw_input = "What I would like to say is the following: " + inputs |
| | history.extend([inputs, ""]) |
| | chatbot.append([inputs, ""]) |
| | yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") |
| |
|
| | prompt = raw_input |
| | tgui_say = "" |
| |
|
| | model_name, addr_port = llm_kwargs['llm_model'].split('@') |
| | assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model'] |
| | addr, port = addr_port.split(':') |
| |
|
| |
|
| | mutable = ["", time.time()] |
| | def run_coorotine(mutable): |
| | async def get_result(mutable): |
| | |
| |
|
| | async for response in run(context=prompt, max_token=llm_kwargs['max_length'], |
| | temperature=llm_kwargs['temperature'], |
| | top_p=llm_kwargs['top_p'], addr=addr, port=port): |
| | print(response[len(mutable[0]):]) |
| | mutable[0] = response |
| | if (time.time() - mutable[1]) > 3: |
| | print('exit when no listener') |
| | break |
| | asyncio.run(get_result(mutable)) |
| |
|
| | thread_listen = threading.Thread(target=run_coorotine, args=(mutable,), daemon=True) |
| | thread_listen.start() |
| |
|
| | while thread_listen.is_alive(): |
| | time.sleep(1) |
| | mutable[1] = time.time() |
| | |
| | if tgui_say != mutable[0]: |
| | tgui_say = mutable[0] |
| | history[-1] = tgui_say |
| | chatbot[-1] = (history[-2], history[-1]) |
| | yield from update_ui(chatbot=chatbot, history=history) |
| |
|
| |
|
| |
|
| |
|
| | def predict_no_ui_long_connection(inputs, llm_kwargs, history, sys_prompt, observe_window, console_slience=False): |
| | raw_input = "What I would like to say is the following: " + inputs |
| | prompt = raw_input |
| | tgui_say = "" |
| | model_name, addr_port = llm_kwargs['llm_model'].split('@') |
| | assert ':' in addr_port, "LLM_MODEL 格式不正确!" + llm_kwargs['llm_model'] |
| | addr, port = addr_port.split(':') |
| |
|
| |
|
| | def run_coorotine(observe_window): |
| | async def get_result(observe_window): |
| | async for response in run(context=prompt, max_token=llm_kwargs['max_length'], |
| | temperature=llm_kwargs['temperature'], |
| | top_p=llm_kwargs['top_p'], addr=addr, port=port): |
| | print(response[len(observe_window[0]):]) |
| | observe_window[0] = response |
| | if (time.time() - observe_window[1]) > 5: |
| | print('exit when no listener') |
| | break |
| | asyncio.run(get_result(observe_window)) |
| | thread_listen = threading.Thread(target=run_coorotine, args=(observe_window,)) |
| | thread_listen.start() |
| | return observe_window[0] |
| |
|