Spaces:

PoppaYAO
/

deepseek-coder-agent

Sleeping

App Files Files Community

PoppaYAO commited on 27 days ago

Commit

b9cc28a

verified ·

1 Parent(s): 4d1b4fd

Update server.py

Browse files

Files changed (1) hide show

server.py +10 -7

server.py CHANGED Viewed

@@ -7,10 +7,11 @@ from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 # --- Configuration ---
-# Model: Dolphin 2.9 Llama 3 8B (Uncensored)
-# Reasoning: Best coding model for CPU hardware with ZERO censorship.
-MODEL_ID = "cognitivecomputations/dolphin-2.9-llama3-8b-gguf"
-MODEL_FILE = "dolphin-2.9-llama3-8b-Q4_K_M.gguf"
 MODEL_PATH = os.path.join("models", MODEL_FILE)
 app = FastAPI(title="Autonomous Coding AI")
@@ -18,22 +19,24 @@ app = FastAPI(title="Autonomous Coding AI")
 # --- 1. Model Loader ---
 print("Checking model existence...")
 if not os.path.exists(MODEL_PATH):
-    print("Model not found. Downloading Uncensored Dolphin model...")
     os.makedirs("models", exist_ok=True)
     hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILE, local_dir="models")
     print("Download complete.")
 print("Loading model into memory...")
 llm = Llama(
     model_path=MODEL_PATH,
-    n_ctx=8192,      # Increased context for coding projects
     n_gpu_layers=0,  # CPU only
     verbose=False
 )
 print("Model loaded successfully!")
 # --- 2. Agent System ---
-# Prompts tailored for an uncensored, autonomous workflow
 SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
 SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
 SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."

 from llama_cpp import Llama
 # --- Configuration ---
+# MODEL: Qwen 2.5 14B Instruct Abliterated
+# WHY: Best balance of "Smart Reasoning" and "Fits in Free RAM". Uncensored.
+# REPO: MaziyarPanahi (Verified reliable uploader)
+MODEL_ID = "MaziyarPanahi/Qwen2.5-14B-Instruct-abliterated-GGUF"
+MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf"
 MODEL_PATH = os.path.join("models", MODEL_FILE)
 app = FastAPI(title="Autonomous Coding AI")
 # --- 1. Model Loader ---
 print("Checking model existence...")
 if not os.path.exists(MODEL_PATH):
+    print(f"Model not found. Downloading {MODEL_FILE}...")
     os.makedirs("models", exist_ok=True)
     hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILE, local_dir="models")
     print("Download complete.")
 print("Loading model into memory...")
+# Qwen models can be sensitive to context size on free hardware.
+# We use 4096 context to ensure it runs smoothly on 16GB RAM.
 llm = Llama(
     model_path=MODEL_PATH,
+    n_ctx=4096,
     n_gpu_layers=0,  # CPU only
     verbose=False
 )
 print("Model loaded successfully!")
 # --- 2. Agent System ---
+# Qwen 2.5 has excellent instruction following.
 SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
 SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
 SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."