Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| from urllib.parse import urlparse | |
| import gradio as gr | |
| import psutil | |
| from ctransformers import AutoModelForCausalLM | |
| from huggingface_hub import hf_hub_download | |
| _ = """ | |
| snapshot_download( | |
| repo_id="TheBloke/falcon-7b-instruct-GGML", | |
| allow_patterns="falcon7b-instruct.ggmlv3.q4_0.bin", | |
| revision="ggmlv3", | |
| local_dir="models", | |
| local_dir_use_symlinks=False, # default "auto" | |
| ) | |
| hf_hub_download( | |
| repo_id=repo_id, | |
| filename=model_filename, | |
| local_dir=local_path, | |
| local_dir_use_symlinks=True, | |
| ) | |
| # """ | |
| # 4.06G | |
| _ = """ | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| "TheBloke/falcon-7b-instruct-GGML", | |
| model_file="falcon7b-instruct.ggmlv3.q4_0.bin", | |
| model_type="falcon", gpu_layers=32, threads=2, | |
| ) | |
| # """ | |
| # _ = Path("models", "falcon7b-instruct.ggmlv3.q4_0.bin").absolute().as_posix() | |
| # assert Path(_).exists(), f"{_} does not exist, perhaps snapshot_download failed?" | |
| URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/main/falcon-7b-instruct.ggccv1.q4_1.bin" | |
| URL = "https://huggingface.co/TheBloke/falcon-7b-instruct-GGML/blob/ggmlv3/falcon7b-instruct.ggmlv3.q4_1.bin" | |
| repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2]) | |
| URL = "https://huggingface.co/TheBloke/30B-Lazarus-GGML/blob/main/30b-Lazarus.ggmlv3.q2_K.bin" # 13.6 G | |
| repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2]) | |
| model_file = Path(URL).name | |
| _ = """ | |
| _ = hf_hub_download( | |
| repo_id=repo_id, | |
| revision="ggmlv3", | |
| filename=model_file, | |
| local_dir="models", | |
| # local_dir_use_symlinks=True, | |
| ) | |
| # """ # for falcon | |
| _ = hf_hub_download( | |
| repo_id=repo_id, | |
| filename=model_file, | |
| local_dir="models", | |
| # local_dir_use_symlinks=True, | |
| ) | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| # repo_id, # "TheBloke/falcon-7b-instruct-GGML", | |
| # model_file=model_file, | |
| # model_file=_, | |
| _, | |
| # model_type="falcon", | |
| model_type="llama", | |
| threads=psutil.cpu_count(logical=False), | |
| ) | |
| TITLE = f"""<h2 align="center">🦅 30B-Lazarus-GGM ({model_file}) 🦅""" | |
| USER_NAME = "User" | |
| BOT_NAME = "Assistant" | |
| DEFAULT_INSTRUCTIONS = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will answer User's questions. Assistant will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins. | |
| """ | |
| RETRY_COMMAND = "/retry" | |
| STOP_STR = f"\n{USER_NAME}:" | |
| STOP_SUSPECT_LIST = [":", "\n", "User"] | |
| def chat_accordion(): | |
| with gr.Accordion("Parameters", open=False): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=2.0, | |
| value=0.8, | |
| step=0.1, | |
| interactive=True, | |
| label="Temperature", | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=0.99, | |
| value=0.9, | |
| step=0.01, | |
| interactive=True, | |
| label="p (nucleus sampling)", | |
| ) | |
| return temperature, top_p | |
| # TODO: fix prompt | |
| def format_chat_prompt(message: str, chat_history, instructions: str) -> str: | |
| instructions = instructions.strip(" ").strip("\n") | |
| prompt = instructions | |
| for turn in chat_history: | |
| user_message, bot_message = turn | |
| prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}" | |
| prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:" | |
| return prompt | |
| def chat(): | |
| with gr.Column(elem_id="chat_container"): | |
| with gr.Row(): | |
| chatbot = gr.Chatbot(elem_id="chatbot") | |
| with gr.Row(): | |
| inputs = gr.Textbox( | |
| placeholder="Ask me anything...", | |
| label="Type an input and press Enter", | |
| max_lines=3, | |
| ) | |
| with gr.Row(elem_id="button_container"): | |
| # with gr.Column(min_width=32): | |
| # submit_button = gr.Button("🚀 Submit") | |
| with gr.Column(min_width=32): | |
| retry_button = gr.Button("♻️ Retry last turn") | |
| with gr.Column(min_width=32): | |
| delete_turn_button = gr.Button("🧽 Delete last turn") | |
| with gr.Column(min_width=32): | |
| clear_chat_button = gr.Button("✨ Delete all history") | |
| gr.Examples( | |
| [ | |
| ["Hey! Any recommendations for my holidays in Abu Dhabi?"], | |
| ["What's the Everett interpretation of quantum mechanics?"], | |
| [ | |
| "Give me a list of the top 10 dive sites you would recommend around the world." | |
| ], | |
| ["Can you tell me more about deep-water soloing?"], | |
| [ | |
| "Can you write a short tweet about 30B-Lazarus-GGM?" | |
| ], | |
| ], | |
| inputs=inputs, | |
| label="Click on any example and press Enter in the input textbox!", | |
| ) | |
| with gr.Row(elem_id="param_container"): | |
| with gr.Column(): | |
| temperature, top_p = chat_accordion() | |
| with gr.Column(): | |
| with gr.Accordion("Instructions", open=False): | |
| instructions = gr.Textbox( | |
| placeholder="LLM instructions", | |
| value=DEFAULT_INSTRUCTIONS, | |
| lines=10, | |
| interactive=True, | |
| label="Instructions", | |
| max_lines=16, | |
| show_label=False, | |
| ) | |
| def run_chat( | |
| message: str, chat_history, instructions: str, temperature: float, top_p: float | |
| ): | |
| if not message or (message == RETRY_COMMAND and len(chat_history) == 0): | |
| yield chat_history | |
| return | |
| if message == RETRY_COMMAND and chat_history: | |
| prev_turn = chat_history.pop(-1) | |
| user_message, _ = prev_turn | |
| message = user_message | |
| prompt = format_chat_prompt(message, chat_history, instructions) | |
| chat_history = chat_history + [[message, ""]] | |
| stream = llm( | |
| prompt, | |
| max_new_tokens=1024, | |
| stop=[STOP_STR, "<|endoftext|>"], | |
| temperature=temperature, | |
| top_p=top_p, | |
| stream=True, | |
| ) | |
| acc_text = "" | |
| for idx, response in enumerate(stream): | |
| text_token = response | |
| if text_token in STOP_SUSPECT_LIST: | |
| acc_text += text_token | |
| continue | |
| if idx == 0 and text_token.startswith(" "): | |
| text_token = text_token[1:] | |
| acc_text += text_token | |
| last_turn = list(chat_history.pop(-1)) | |
| last_turn[-1] += acc_text | |
| chat_history = chat_history + [last_turn] | |
| yield chat_history | |
| acc_text = "" | |
| def delete_last_turn(chat_history): | |
| if chat_history: | |
| chat_history.pop(-1) | |
| return {chatbot: gr.update(value=chat_history)} | |
| def run_retry( | |
| message: str, chat_history, instructions: str, temperature: float, top_p: float | |
| ): | |
| yield from run_chat( | |
| RETRY_COMMAND, chat_history, instructions, temperature, top_p | |
| ) | |
| def clear_chat(): | |
| return [] | |
| inputs.submit( | |
| run_chat, | |
| [inputs, chatbot, instructions, temperature, top_p], | |
| outputs=[chatbot], | |
| show_progress="minimal", | |
| ) | |
| inputs.submit(lambda: "", inputs=None, outputs=inputs) | |
| # submit_button.click( | |
| # run_chat, | |
| # [inputs, chatbot, instructions, temperature, top_p], | |
| # outputs=[chatbot], | |
| # show_progress="minimal", | |
| # ) | |
| delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot]) | |
| retry_button.click( | |
| run_retry, | |
| [inputs, chatbot, instructions, temperature, top_p], | |
| outputs=[chatbot], | |
| show_progress="minimal", | |
| ) | |
| clear_chat_button.click(clear_chat, [], chatbot) | |
| def get_demo(): | |
| with gr.Blocks( | |
| # css=None | |
| # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;} | |
| # #button_container {width: 700px; margin-left: auto; margin-right: auto;} | |
| # #param_container {width: 700px; margin-left: auto; margin-right: auto;}""" | |
| css="""#chatbot { | |
| font-size: 14px; | |
| min-height: 300px; | |
| }""" | |
| ) as demo: | |
| gr.HTML(TITLE) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown( | |
| """ | |
| ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. | |
| """ | |
| ) | |
| chat() | |
| return demo | |
| if __name__ == "__main__": | |
| demo = get_demo() | |
| demo.queue(max_size=64, concurrency_count=8) | |
| # demo.launch(server_name="0.0.0.0", server_port=7860) | |
| demo.launch() | |