Spaces:
Build error
Build error
| import gradio as gr | |
| from threading import Thread | |
| import os | |
| from ctransformers import AutoModelForCausalLM | |
| llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
| model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", | |
| model_type="mistral", | |
| temperature=0.7, | |
| gpu_layers=0, | |
| stream=True, | |
| threads=int(os.cpu_count()), | |
| max_new_tokens=10000) | |
| # Function to generate model predictions. | |
| def predict(message, history): | |
| history_transformer_format = history + [[message, ""]] | |
| # Formatting the input for the model. | |
| messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]]) | |
| for item in history_transformer_format]) | |
| prompt = f"[INST]{messages}[/INST]" | |
| message_out = "" | |
| for text in llm(prompt=prompt): | |
| message_out += text | |
| yield message_out | |
| # Setting up the Gradio chat interface. | |
| gr.ChatInterface(predict, | |
| title="Test Mistral 7B", | |
| description="Ask Mistral any questions", | |
| examples=['How to cook a fish?', 'Who is the president of US now?'] | |
| ).launch() # Launching the web interface. |