Spaces:

ntaexams
/

ProfCool

Sleeping

ntaexams commited on Mar 9, 2025

Commit

01ed2fe

verified ·

1 Parent(s): f0b2b62

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,35 +1,36 @@
 import gradio as gr
 from ctransformers import AutoModelForCausalLM
 # Define the model repository and file
-model_repo = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
-model_file = "openhermes-2-mistral-7b.Q4_K_M.gguf"
 # Download and load the model
-print(f"Downloading {model_file} from {model_repo}...")
 model = AutoModelForCausalLM.from_pretrained(
-    model_repo,
-    model_file=model_file,
     model_type="mistral",
-    # Uncomment if GPU is available
-    # gpu_layers=50
-    context_length=1024
 )
-print("Model downloaded and loaded successfully.")
-# Define the function to interact with the model
 def chat_with_model(prompt):
     response = model(prompt)
     return response
-# Create a Gradio interface
 iface = gr.Interface(
     fn=chat_with_model,
-    inputs=gr.Textbox(lines=2, placeholder="Ask something..."),
     outputs="text",
-    title="Mistral 7B Chatbot",
-    description="Interact with Mistral-7B using GGUF & ctransformers.",
 )
-# Launch the Gradio app
-iface.launch()

+import os
 import gradio as gr
 from ctransformers import AutoModelForCausalLM
 # Define the model repository and file
+MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
+MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf"  # Use Q8_0 for better CPU performance
 # Download and load the model
+print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...")
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_REPO,
+    model_file=MODEL_FILE,
     model_type="mistral",
+    gpu_layers=50 if torch.cuda.is_available() else 0,  # Use GPU if available
+    context_length=1024  # Reduce context length for faster response
 )
+print("Model loaded successfully.")
+# Function to generate responses
 def chat_with_model(prompt):
     response = model(prompt)
     return response
+# Gradio UI
 iface = gr.Interface(
     fn=chat_with_model,
+    inputs=gr.Textbox(lines=2, placeholder="Enter your query..."),
     outputs="text",
+    title="Mistral-7B Chatbot",
+    description="Optimized chatbot using Mistral-7B GGUF with improved speed.",
 )
+# Run the Gradio app
+if __name__ == "__main__":
+    iface.launch()