from ctransformers import AutoModelForCausalLM from gradio import Chatbot, Interface import gradio as gr # Load the GGUF model llm = AutoModelForCausalLM.from_pretrained( "zephyr-7b-beta.Q4_K_S.gguf", model_type="mistral", max_new_tokens=1096, threads=3 ) # Format prompt with system message and chat history def format_prompt(message, chat_history): system_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." E_INST = "" user, assistant = "<|user|>", "<|assistant|>" prompt = f"{system_prompt}{E_INST}\n" for user_msg, bot_msg in chat_history: prompt += f"{user}\n{user_msg}{E_INST}\n{assistant}\n{bot_msg}{E_INST}\n" prompt += f"{user}\n{message}{E_INST}\n{assistant}\n" return prompt # Define chatbot function def respond(message, chat_history): formatted_prompt = format_prompt(message, chat_history) response = llm(formatted_prompt) chat_history.append((message, response)) return chat_history, chat_history # Create Gradio Chatbot UI chatbot = Chatbot( bubble_full_width=False, height=500 ) # Launch interface with gr.Blocks() as demo: gr.Markdown("## Zephyr LLM Chat Interface") chatbot = gr.Chatbot() msg = gr.Textbox(label="Your Message") clear = gr.Button("Clear Chat") state = gr.State([]) msg.submit(respond, [msg, state], [chatbot, state]) clear.click(lambda: ([], []), None, [chatbot, state]) # Launch Gradio app if __name__ == "__main__": demo.launch()