PoppaYAO commited on
Commit
b9cc28a
·
verified ·
1 Parent(s): 4d1b4fd

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +10 -7
server.py CHANGED
@@ -7,10 +7,11 @@ from huggingface_hub import hf_hub_download
7
  from llama_cpp import Llama
8
 
9
  # --- Configuration ---
10
- # Model: Dolphin 2.9 Llama 3 8B (Uncensored)
11
- # Reasoning: Best coding model for CPU hardware with ZERO censorship.
12
- MODEL_ID = "cognitivecomputations/dolphin-2.9-llama3-8b-gguf"
13
- MODEL_FILE = "dolphin-2.9-llama3-8b-Q4_K_M.gguf"
 
14
  MODEL_PATH = os.path.join("models", MODEL_FILE)
15
 
16
  app = FastAPI(title="Autonomous Coding AI")
@@ -18,22 +19,24 @@ app = FastAPI(title="Autonomous Coding AI")
18
  # --- 1. Model Loader ---
19
  print("Checking model existence...")
20
  if not os.path.exists(MODEL_PATH):
21
- print("Model not found. Downloading Uncensored Dolphin model...")
22
  os.makedirs("models", exist_ok=True)
23
  hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILE, local_dir="models")
24
  print("Download complete.")
25
 
26
  print("Loading model into memory...")
 
 
27
  llm = Llama(
28
  model_path=MODEL_PATH,
29
- n_ctx=8192, # Increased context for coding projects
30
  n_gpu_layers=0, # CPU only
31
  verbose=False
32
  )
33
  print("Model loaded successfully!")
34
 
35
  # --- 2. Agent System ---
36
- # Prompts tailored for an uncensored, autonomous workflow
37
  SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
38
  SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
39
  SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."
 
7
  from llama_cpp import Llama
8
 
9
  # --- Configuration ---
10
+ # MODEL: Qwen 2.5 14B Instruct Abliterated
11
+ # WHY: Best balance of "Smart Reasoning" and "Fits in Free RAM". Uncensored.
12
+ # REPO: MaziyarPanahi (Verified reliable uploader)
13
+ MODEL_ID = "MaziyarPanahi/Qwen2.5-14B-Instruct-abliterated-GGUF"
14
+ MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf"
15
  MODEL_PATH = os.path.join("models", MODEL_FILE)
16
 
17
  app = FastAPI(title="Autonomous Coding AI")
 
19
  # --- 1. Model Loader ---
20
  print("Checking model existence...")
21
  if not os.path.exists(MODEL_PATH):
22
+ print(f"Model not found. Downloading {MODEL_FILE}...")
23
  os.makedirs("models", exist_ok=True)
24
  hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILE, local_dir="models")
25
  print("Download complete.")
26
 
27
  print("Loading model into memory...")
28
+ # Qwen models can be sensitive to context size on free hardware.
29
+ # We use 4096 context to ensure it runs smoothly on 16GB RAM.
30
  llm = Llama(
31
  model_path=MODEL_PATH,
32
+ n_ctx=4096,
33
  n_gpu_layers=0, # CPU only
34
  verbose=False
35
  )
36
  print("Model loaded successfully!")
37
 
38
  # --- 2. Agent System ---
39
+ # Qwen 2.5 has excellent instruction following.
40
  SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
41
  SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
42
  SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."