Spaces:

scriptsledge
/

clarity-backend

Sleeping

scriptsledge commited on Dec 20, 2025

Commit

71c6963

verified ·

1 Parent(s): f7181e6

perf: switch to 0.5B model for maximum responsiveness on CPU

Files changed (1) hide show

model_service.py CHANGED Viewed

@@ -3,10 +3,10 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 # --- Configuration ---
-# Using the ultra-compressed 2-bit version of Qwen 2.5 Coder 1.5B
-# This is extremely fast and has very low memory usage, though intelligence may vary.
-REPO_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF"
-FILENAME = "qwen2.5-coder-1.5b-instruct-q2_k.gguf"
 print(f"Initializing Clarity AI Engine (llama.cpp)...")
 print(f"Target Model: {REPO_ID} [{FILENAME}]")

 from huggingface_hub import hf_hub_download
 # --- Configuration ---
+# Using the ultra-lightweight Qwen 2.5 Coder 0.5B
+# This is the fastest possible option for CPU/Edge devices.
+REPO_ID = "Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF"
+FILENAME = "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf"
 print(f"Initializing Clarity AI Engine (llama.cpp)...")
 print(f"Target Model: {REPO_ID} [{FILENAME}]")