PoppaYAO commited on
Commit
e4dd19e
·
verified ·
1 Parent(s): b9cc28a

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +6 -8
server.py CHANGED
@@ -7,10 +7,10 @@ from huggingface_hub import hf_hub_download
7
  from llama_cpp import Llama
8
 
9
  # --- Configuration ---
10
- # MODEL: Qwen 2.5 14B Instruct Abliterated
11
- # WHY: Best balance of "Smart Reasoning" and "Fits in Free RAM". Uncensored.
12
- # REPO: MaziyarPanahi (Verified reliable uploader)
13
- MODEL_ID = "MaziyarPanahi/Qwen2.5-14B-Instruct-abliterated-GGUF"
14
  MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf"
15
  MODEL_PATH = os.path.join("models", MODEL_FILE)
16
 
@@ -25,18 +25,16 @@ if not os.path.exists(MODEL_PATH):
25
  print("Download complete.")
26
 
27
  print("Loading model into memory...")
28
- # Qwen models can be sensitive to context size on free hardware.
29
- # We use 4096 context to ensure it runs smoothly on 16GB RAM.
30
  llm = Llama(
31
  model_path=MODEL_PATH,
32
- n_ctx=4096,
33
  n_gpu_layers=0, # CPU only
34
  verbose=False
35
  )
36
  print("Model loaded successfully!")
37
 
38
  # --- 2. Agent System ---
39
- # Qwen 2.5 has excellent instruction following.
40
  SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
41
  SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
42
  SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."
 
7
  from llama_cpp import Llama
8
 
9
  # --- Configuration ---
10
+ # MODEL: Qwen 2.5 14B Instruct Abliterated (GGUF Version)
11
+ # WHY: Uncensored (Abliterated) + Fits in Free CPU Memory (GGUF Q4)
12
+ # REPO: mradermacher (Verified GGUF provider for huihui-ai models)
13
+ MODEL_ID = "mradermacher/Qwen2.5-14B-Instruct-abliterated-GGUF"
14
  MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf"
15
  MODEL_PATH = os.path.join("models", MODEL_FILE)
16
 
 
25
  print("Download complete.")
26
 
27
  print("Loading model into memory...")
28
+ # We use llama.cpp (Llama class) to run this efficiently on CPU
 
29
  llm = Llama(
30
  model_path=MODEL_PATH,
31
+ n_ctx=4096, # Context window size
32
  n_gpu_layers=0, # CPU only
33
  verbose=False
34
  )
35
  print("Model loaded successfully!")
36
 
37
  # --- 2. Agent System ---
 
38
  SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
39
  SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
40
  SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."