ProfessorCEO commited on
Commit
25fe212
·
verified ·
1 Parent(s): fb559c4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +19 -10
main.py CHANGED
@@ -16,22 +16,22 @@ axiom_model = None
16
  @app.on_event("startup")
17
  def load_model():
18
  global axiom_model
19
- print("📡 DOWNLOADING AXIOM TO HF SPACE...")
20
  try:
21
- # Download (If public space, no token needed for public model.
22
- # If model is private, we need HF_TOKEN secret)
23
  model_path = hf_hub_download(
24
  repo_id=REPO_ID,
25
  filename=FILENAME,
26
  token=os.environ.get("HF_TOKEN")
27
  )
28
- print("🧠 LOADING INTO 16GB RAM...")
 
29
  axiom_model = Llama(
30
  model_path=model_path,
31
- n_ctx=2048,
32
- n_threads=2 # Standard for HF Free Tier
 
33
  )
34
- print("✅ AXIOM ONLINE")
35
  except Exception as e:
36
  print(f"❌ ERROR: {e}")
37
 
@@ -40,21 +40,30 @@ class ChatRequest(BaseModel):
40
 
41
  @app.get("/")
42
  def home():
43
- return {"status": "Axiom Space Online"}
44
 
45
  @app.post("/v1/chat/completions")
46
  async def chat(request: ChatRequest):
47
  if not axiom_model:
48
  raise HTTPException(status_code=503, detail="Model loading...")
49
 
50
- prompt = "<|begin_of_text|>"
 
 
 
 
51
  for msg in request.messages:
52
  role = msg['role']
53
  content = msg['content']
54
  prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
55
  prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
56
 
 
57
  output = axiom_model(
58
- prompt, max_tokens=512, stop=["<|eot_id|>"], echo=False
 
 
 
59
  )
 
60
  return {"choices": [{"message": {"role": "assistant", "content": output['choices'][0]['text']}}]}
 
16
  @app.on_event("startup")
17
  def load_model():
18
  global axiom_model
19
+ print("📡 DOWNLOADING AXIOM...")
20
  try:
 
 
21
  model_path = hf_hub_download(
22
  repo_id=REPO_ID,
23
  filename=FILENAME,
24
  token=os.environ.get("HF_TOKEN")
25
  )
26
+ print("🧠 LOADING (LIGHT MODE)...")
27
+ # OPTIMIZATION: n_ctx=512 makes it MUCH faster on Free Tier
28
  axiom_model = Llama(
29
  model_path=model_path,
30
+ n_ctx=512, # Reduced from 2048 for speed
31
+ n_threads=2, # Optimal for Hugging Face Free Tier
32
+ verbose=False
33
  )
34
+ print("✅ AXIOM ONLINE (FAST MODE)")
35
  except Exception as e:
36
  print(f"❌ ERROR: {e}")
37
 
 
40
 
41
  @app.get("/")
42
  def home():
43
+ return {"status": "Axiom Online"}
44
 
45
  @app.post("/v1/chat/completions")
46
  async def chat(request: ChatRequest):
47
  if not axiom_model:
48
  raise HTTPException(status_code=503, detail="Model loading...")
49
 
50
+ # --- PROMPT FORMATTING (Fixed) ---
51
+ # We removed <|begin_of_text|> to fix the warning
52
+ # We inject the Identity immediately
53
+ prompt = "<|start_header_id|>system<|end_header_id|>\n\nYou are Axiom 3.1, a Sovereign AI created by Professor Heritage at Cool Shot Systems.<|eot_id|>"
54
+
55
  for msg in request.messages:
56
  role = msg['role']
57
  content = msg['content']
58
  prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
59
  prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
60
 
61
+ # GENERATION SETTINGS
62
  output = axiom_model(
63
+ prompt,
64
+ max_tokens=128, # Limit output length to save time
65
+ stop=["<|eot_id|>", "<|end_of_text|>"],
66
+ echo=False
67
  )
68
+
69
  return {"choices": [{"message": {"role": "assistant", "content": output['choices'][0]['text']}}]}