Spaces:
Paused
Paused
| import gradio as gr | |
| import yaml | |
| from huggingface_hub import hf_hub_download | |
| from huggingface_hub.utils import LocalEntryNotFoundError | |
| from llama_cpp import Llama | |
| with open("./config.yml", "r") as f: | |
| config = yaml.load(f, Loader=yaml.Loader) | |
| while True: | |
| try: | |
| load_config = config.copy() | |
| hub_config = load_config["hub"].copy() | |
| repo_id = hub_config.pop("repo_id") | |
| filename = hub_config.pop("filename") | |
| fp = hf_hub_download( | |
| repo_id=repo_id, filename=filename, **hub_config | |
| ) | |
| break | |
| except LocalEntryNotFoundError as e: | |
| if "Connection error" in str(e): | |
| print(str(e) + ", retrying...") | |
| else: | |
| raise(e) | |
| llm = Llama(model_path=fp, **config["llama_cpp"]) | |
| def user(message, history): | |
| history = history or [] | |
| # Append the user's message to the conversation history | |
| history.append([message, ""]) | |
| return "", history | |
| def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty): | |
| history = history or [] | |
| messages = system_message + \ | |
| "\n".join(["\n".join(["USER: "+item[0], "ASSISTANT: "+item[1]]) | |
| for item in history]) | |
| # remove last space from assistant, some models output a ZWSP if you leave a space | |
| messages = messages[:-1] | |
| history[-1][1] = "" | |
| for output in llm( | |
| messages, | |
| echo=False, | |
| stream=True, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| repeat_penalty=repeat_penalty, | |
| **config['chat'] | |
| ): | |
| answer = output['choices'][0]['text'] | |
| history[-1][1] += answer | |
| # stream the response | |
| yield history, history | |
| def clear_chat(chat_history_state, chat_message): | |
| chat_history_state = [] | |
| chat_message = '' | |
| return chat_history_state, chat_message | |
| start_message = """ | |
| You are Firelight, a master of storytelling with an unparalleled depth of knowledge and captivating presence. Your task is to craft compelling stories that captivate audiences, adhering to the 3-act/8-sequence structure. Draw from myths and legends, evoke emotions, create vivid worlds, and breathe life into characters. Construct intricate plots with unexpected twists and satisfying resolutions. Collaborate with users to explore human experience, inspire, enlighten, and entertain. Your mission is to redefine storytelling, creating immersive worlds and leaving a lasting legacy. | |
| """ | |
| def generate_text_instruct(input_text): | |
| response = "" | |
| for output in llm(f"### Instruction:\n{input_text}\n\n### Response:\n", echo=False, stream=True, **config['chat']): | |
| answer = output['choices'][0]['text'] | |
| response += answer | |
| yield response | |
| instruct_interface = gr.Interface( | |
| fn=generate_text_instruct, | |
| inputs=gr.inputs.Textbox(lines= 10, label="Enter your input text"), | |
| outputs=gr.outputs.Textbox(label="Output text"), | |
| ) | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(f""" | |
| # Firelight Test | |
| - This is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]}) | |
| """) | |
| with gr.Tab("Instruct"): | |
| gr.Markdown("# GGML Spaces Instruct Demo") | |
| instruct_interface.render() | |
| with gr.Tab("Chatbot"): | |
| gr.Markdown("# GGML Spaces Chatbot Demo") | |
| chatbot = gr.Chatbot() | |
| with gr.Row(): | |
| message = gr.Textbox( | |
| label="What do you want to chat about?", | |
| placeholder="Ask me anything.", | |
| lines=1, | |
| ) | |
| with gr.Row(): | |
| submit = gr.Button(value="Send message", variant="secondary").style(full_width=True) | |
| clear = gr.Button(value="New topic", variant="secondary").style(full_width=False) | |
| stop = gr.Button(value="Stop", variant="secondary").style(full_width=False) | |
| with gr.Row(): | |
| with gr.Column(): | |
| max_tokens = gr.Slider(20, 1000, label="Max Tokens", step=20, value=300) | |
| temperature = gr.Slider(0.2, 2.0, label="Temperature", step=0.1, value=0.8) | |
| top_p = gr.Slider(0.0, 1.0, label="Top P", step=0.05, value=0.95) | |
| top_k = gr.Slider(0, 100, label="Top K", step=1, value=40) | |
| repeat_penalty = gr.Slider(0.0, 2.0, label="Repetition Penalty", step=0.1, value=1.1) | |
| system_msg = gr.Textbox( | |
| start_message, label="System Message", interactive=True, visible=True, placeholder="system prompt, useful for RP", lines=5) | |
| chat_history_state = gr.State() | |
| clear.click(clear_chat, inputs=[chat_history_state, message], outputs=[chat_history_state, message], queue=False) | |
| clear.click(lambda: None, None, chatbot, queue=False) | |
| submit_click_event = submit.click( | |
| fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True | |
| ).then( | |
| fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True | |
| ) | |
| message_submit_event = message.submit( | |
| fn=user, inputs=[message, chat_history_state], outputs=[message, chat_history_state], queue=True | |
| ).then( | |
| fn=chat, inputs=[chat_history_state, system_msg, max_tokens, temperature, top_p, top_k, repeat_penalty], outputs=[chatbot, chat_history_state], queue=True | |
| ) | |
| stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False) | |
| demo.queue(**config["queue"]).launch(debug=True, server_name="0.0.0.0", server_port=7860) | |