Spaces:

PoppaYAO
/

deepseek-coder-agent

Sleeping

PoppaYAO commited on 26 days ago

Commit

e4dd19e

verified ·

1 Parent(s): b9cc28a

Update server.py

Files changed (1) hide show

server.py CHANGED Viewed

@@ -7,10 +7,10 @@ from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 # --- Configuration ---
-# MODEL: Qwen 2.5 14B Instruct Abliterated
-# WHY: Best balance of "Smart Reasoning" and "Fits in Free RAM". Uncensored.
-# REPO: MaziyarPanahi (Verified reliable uploader)
-MODEL_ID = "MaziyarPanahi/Qwen2.5-14B-Instruct-abliterated-GGUF"
 MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf"
 MODEL_PATH = os.path.join("models", MODEL_FILE)
@@ -25,18 +25,16 @@ if not os.path.exists(MODEL_PATH):
     print("Download complete.")
 print("Loading model into memory...")
-# Qwen models can be sensitive to context size on free hardware.
-# We use 4096 context to ensure it runs smoothly on 16GB RAM.
 llm = Llama(
     model_path=MODEL_PATH,
-    n_ctx=4096,
     n_gpu_layers=0,  # CPU only
     verbose=False
 )
 print("Model loaded successfully!")
 # --- 2. Agent System ---
-# Qwen 2.5 has excellent instruction following.
 SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
 SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
 SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."

 from llama_cpp import Llama
 # --- Configuration ---
+# MODEL: Qwen 2.5 14B Instruct Abliterated (GGUF Version)
+# WHY: Uncensored (Abliterated) + Fits in Free CPU Memory (GGUF Q4)
+# REPO: mradermacher (Verified GGUF provider for huihui-ai models)
+MODEL_ID = "mradermacher/Qwen2.5-14B-Instruct-abliterated-GGUF"
 MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf"
 MODEL_PATH = os.path.join("models", MODEL_FILE)
     print("Download complete.")
 print("Loading model into memory...")
+# We use llama.cpp (Llama class) to run this efficiently on CPU
 llm = Llama(
     model_path=MODEL_PATH,
+    n_ctx=4096,      # Context window size
     n_gpu_layers=0,  # CPU only
     verbose=False
 )
 print("Model loaded successfully!")
 # --- 2. Agent System ---
 SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
 SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
 SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."