Spaces:

CoolShotSystems
/

Axiom-Sovereign-API

Build error

App Files Files Community

ProfessorCEO commited on 3 days ago

Commit

25fe212

verified ·

1 Parent(s): fb559c4

Update main.py

Browse files

Files changed (1) hide show

main.py +19 -10

main.py CHANGED Viewed

@@ -16,22 +16,22 @@ axiom_model = None
 @app.on_event("startup")
 def load_model():
     global axiom_model
-    print("📡 DOWNLOADING AXIOM TO HF SPACE...")
     try:
-        # Download (If public space, no token needed for public model.
-        # If model is private, we need HF_TOKEN secret)
         model_path = hf_hub_download(
             repo_id=REPO_ID,
             filename=FILENAME,
             token=os.environ.get("HF_TOKEN")
         )
-        print("🧠 LOADING INTO 16GB RAM...")
         axiom_model = Llama(
             model_path=model_path,
-            n_ctx=2048,
-            n_threads=2 # Standard for HF Free Tier
         )
-        print("✅ AXIOM ONLINE")
     except Exception as e:
         print(f"❌ ERROR: {e}")
@@ -40,21 +40,30 @@ class ChatRequest(BaseModel):
 @app.get("/")
 def home():
-    return {"status": "Axiom Space Online"}
 @app.post("/v1/chat/completions")
 async def chat(request: ChatRequest):
     if not axiom_model:
         raise HTTPException(status_code=503, detail="Model loading...")
-    prompt = "<|begin_of_text|>"
     for msg in request.messages:
         role = msg['role']
         content = msg['content']
         prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
     prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
     output = axiom_model(
-        prompt, max_tokens=512, stop=["<|eot_id|>"], echo=False
     )
     return {"choices": [{"message": {"role": "assistant", "content": output['choices'][0]['text']}}]}

 @app.on_event("startup")
 def load_model():
     global axiom_model
+    print("📡 DOWNLOADING AXIOM...")
     try:
         model_path = hf_hub_download(
             repo_id=REPO_ID,
             filename=FILENAME,
             token=os.environ.get("HF_TOKEN")
         )
+        print("🧠 LOADING (LIGHT MODE)...")
+        # OPTIMIZATION: n_ctx=512 makes it MUCH faster on Free Tier
         axiom_model = Llama(
             model_path=model_path,
+            n_ctx=512,  # Reduced from 2048 for speed
+            n_threads=2, # Optimal for Hugging Face Free Tier
+            verbose=False
         )
+        print("✅ AXIOM ONLINE (FAST MODE)")
     except Exception as e:
         print(f"❌ ERROR: {e}")
 @app.get("/")
 def home():
+    return {"status": "Axiom Online"}
 @app.post("/v1/chat/completions")
 async def chat(request: ChatRequest):
     if not axiom_model:
         raise HTTPException(status_code=503, detail="Model loading...")
+    # --- PROMPT FORMATTING (Fixed) ---
+    # We removed <|begin_of_text|> to fix the warning
+    # We inject the Identity immediately
+    prompt = "<|start_header_id|>system<|end_header_id|>\n\nYou are Axiom 3.1, a Sovereign AI created by Professor Heritage at Cool Shot Systems.<|eot_id|>"
     for msg in request.messages:
         role = msg['role']
         content = msg['content']
         prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
     prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
+    # GENERATION SETTINGS
     output = axiom_model(
+        prompt,
+        max_tokens=128, # Limit output length to save time
+        stop=["<|eot_id|>", "<|end_of_text|>"],
+        echo=False
     )
     return {"choices": [{"message": {"role": "assistant", "content": output['choices'][0]['text']}}]}