ntaexams commited on
Commit
01ed2fe
·
verified ·
1 Parent(s): f0b2b62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -1,35 +1,36 @@
 
1
  import gradio as gr
2
  from ctransformers import AutoModelForCausalLM
3
 
4
  # Define the model repository and file
5
- model_repo = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
6
- model_file = "openhermes-2-mistral-7b.Q4_K_M.gguf"
7
 
8
  # Download and load the model
9
- print(f"Downloading {model_file} from {model_repo}...")
10
  model = AutoModelForCausalLM.from_pretrained(
11
- model_repo,
12
- model_file=model_file,
13
  model_type="mistral",
14
- # Uncomment if GPU is available
15
- # gpu_layers=50
16
- context_length=1024
17
  )
18
- print("Model downloaded and loaded successfully.")
19
 
20
- # Define the function to interact with the model
21
  def chat_with_model(prompt):
22
  response = model(prompt)
23
  return response
24
 
25
- # Create a Gradio interface
26
  iface = gr.Interface(
27
  fn=chat_with_model,
28
- inputs=gr.Textbox(lines=2, placeholder="Ask something..."),
29
  outputs="text",
30
- title="Mistral 7B Chatbot",
31
- description="Interact with Mistral-7B using GGUF & ctransformers.",
32
  )
33
 
34
- # Launch the Gradio app
35
- iface.launch()
 
 
1
+ import os
2
  import gradio as gr
3
  from ctransformers import AutoModelForCausalLM
4
 
5
  # Define the model repository and file
6
+ MODEL_REPO = "TheBloke/OpenHermes-2-Mistral-7B-GGUF"
7
+ MODEL_FILE = "openhermes-2-mistral-7b.Q8_0.gguf" # Use Q8_0 for better CPU performance
8
 
9
  # Download and load the model
10
+ print(f"Downloading {MODEL_FILE} from {MODEL_REPO}...")
11
  model = AutoModelForCausalLM.from_pretrained(
12
+ MODEL_REPO,
13
+ model_file=MODEL_FILE,
14
  model_type="mistral",
15
+ gpu_layers=50 if torch.cuda.is_available() else 0, # Use GPU if available
16
+ context_length=1024 # Reduce context length for faster response
 
17
  )
18
+ print("Model loaded successfully.")
19
 
20
+ # Function to generate responses
21
  def chat_with_model(prompt):
22
  response = model(prompt)
23
  return response
24
 
25
+ # Gradio UI
26
  iface = gr.Interface(
27
  fn=chat_with_model,
28
+ inputs=gr.Textbox(lines=2, placeholder="Enter your query..."),
29
  outputs="text",
30
+ title="Mistral-7B Chatbot",
31
+ description="Optimized chatbot using Mistral-7B GGUF with improved speed.",
32
  )
33
 
34
+ # Run the Gradio app
35
+ if __name__ == "__main__":
36
+ iface.launch()