ProfessorCEO commited on
Commit
6577d50
·
verified ·
1 Parent(s): ac4ef7f

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +60 -0
main.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from huggingface_hub import hf_hub_download
5
+ from llama_cpp import Llama
6
+
7
+ app = FastAPI()
8
+
9
+ # --- CONFIGURATION ---
10
+ REPO_ID = "CoolShotSystems/Axiom-3.1-Sovereign"
11
+ FILENAME = "Meta-Llama-3.1-8B.Q4_K_M.gguf"
12
+
13
+ # Global Model Variable
14
+ axiom_model = None
15
+
16
+ @app.on_event("startup")
17
+ def load_model():
18
+ global axiom_model
19
+ print("📡 DOWNLOADING AXIOM TO HF SPACE...")
20
+ try:
21
+ # Download (If public space, no token needed for public model.
22
+ # If model is private, we need HF_TOKEN secret)
23
+ model_path = hf_hub_download(
24
+ repo_id=REPO_ID,
25
+ filename=FILENAME,
26
+ token=os.environ.get("HF_TOKEN")
27
+ )
28
+ print("🧠 LOADING INTO 16GB RAM...")
29
+ axiom_model = Llama(
30
+ model_path=model_path,
31
+ n_ctx=2048,
32
+ n_threads=2 # Standard for HF Free Tier
33
+ )
34
+ print("✅ AXIOM ONLINE")
35
+ except Exception as e:
36
+ print(f"❌ ERROR: {e}")
37
+
38
+ class ChatRequest(BaseModel):
39
+ messages: list
40
+
41
+ @app.get("/")
42
+ def home():
43
+ return {"status": "Axiom Space Online"}
44
+
45
+ @app.post("/v1/chat/completions")
46
+ async def chat(request: ChatRequest):
47
+ if not axiom_model:
48
+ raise HTTPException(status_code=503, detail="Model loading...")
49
+
50
+ prompt = "<|begin_of_text|>"
51
+ for msg in request.messages:
52
+ role = msg['role']
53
+ content = msg['content']
54
+ prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
55
+ prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
56
+
57
+ output = axiom_model(
58
+ prompt, max_tokens=512, stop=["<|eot_id|>"], echo=False
59
+ )
60
+ return {"choices": [{"message": {"role": "assistant", "content": output['choices'][0]['text']}}]}