Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from ctransformers import AutoModelForCausalLM | |
| # Define the model repository and file | |
| MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF" | |
| MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf" # Use Q8_0 for better CPU performance | |
| # Download and load the model | |
| print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_REPO, | |
| model_file=MODEL_FILE, | |
| model_type="mistral", | |
| # gpu_layers=50 if torch.cuda.is_available() else 0, # Use GPU if available | |
| context_length=256 # Reduce context length for faster response | |
| ) | |
| print("Model loaded successfully.") | |
| # Function to generate responses | |
| def chat_with_model(prompt): | |
| response = model(prompt) | |
| return response | |
| # Gradio UI | |
| iface = gr.Interface( | |
| fn=chat_with_model, | |
| inputs=gr.Textbox(lines=2, placeholder="Enter your query..."), | |
| outputs="text", | |
| title="Mistral-7B Chatbot", | |
| description="Optimized chatbot using Mistral-7B GGUF with improved speed.", | |
| ) | |
| # Run the Gradio app | |
| if __name__ == "__main__": | |
| iface.launch(share=True) |