scriptsledge commited on
Commit
71c6963
·
verified ·
1 Parent(s): f7181e6

perf: switch to 0.5B model for maximum responsiveness on CPU

Browse files
Files changed (1) hide show
  1. model_service.py +4 -4
model_service.py CHANGED
@@ -3,10 +3,10 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
  # --- Configuration ---
6
- # Using the ultra-compressed 2-bit version of Qwen 2.5 Coder 1.5B
7
- # This is extremely fast and has very low memory usage, though intelligence may vary.
8
- REPO_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF"
9
- FILENAME = "qwen2.5-coder-1.5b-instruct-q2_k.gguf"
10
 
11
  print(f"Initializing Clarity AI Engine (llama.cpp)...")
12
  print(f"Target Model: {REPO_ID} [{FILENAME}]")
 
3
  from huggingface_hub import hf_hub_download
4
 
5
  # --- Configuration ---
6
+ # Using the ultra-lightweight Qwen 2.5 Coder 0.5B
7
+ # This is the fastest possible option for CPU/Edge devices.
8
+ REPO_ID = "Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF"
9
+ FILENAME = "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf"
10
 
11
  print(f"Initializing Clarity AI Engine (llama.cpp)...")
12
  print(f"Target Model: {REPO_ID} [{FILENAME}]")