Spaces:

CooLLaMACEO
/

ChatGPTOpenSource1.0

Running

CooLLaMACEO commited on Feb 4

Commit

e85d42c

verified ·

1 Parent(s): 1daa6dc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,37 +3,65 @@ from pydantic import BaseModel
 from llama_cpp import Llama
 import os
-app = FastAPI()
 MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
-print("🔄 Loading model… this may take a while")
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=16384,
     n_threads=os.cpu_count(),
-    n_gpu_layers=0,  # HF CPU-only unless paid GPU
     verbose=False,
 )
-print("✅ Model loaded successfully")
 class ChatRequest(BaseModel):
-    prompt: str
-@app.post("/chat")
 def chat(req: ChatRequest):
     output = llm(
-        f"User: {req.prompt}\nAssistant:",
         max_tokens=512,
-        stop=["User:"],
     )
-    return {
-        "response": output["choices"][0]["text"].strip()
-    }
-@app.get("/")
-def root():
-    return {"status": "ChatGPT Open-Source 1.0 is running 🚀"}

 from llama_cpp import Llama
 import os
+# =========================
+# Config
+# =========================
 MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
+SYSTEM_PROMPT = (
+    "You are ChatGPT Open-Source 1.0, a high-performance local AI. "
+    "You were built by the open-source community. "
+    "You are helpful, witty, and proud to run locally without the internet."
+)
+# =========================
+# Load Model (ON START)
+# =========================
+print("🔥 Loading model...")
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=16384,
     n_threads=os.cpu_count(),
+    n_batch=256,
     verbose=False,
 )
+print("✅ Model loaded!")
+# =========================
+# FastAPI
+# =========================
+app = FastAPI(title="ChatGPT Open-Source 1.0")
 class ChatRequest(BaseModel):
+    message: str
+class ChatResponse(BaseModel):
+    reply: str
+@app.get("/")
+def root():
+    return {
+        "name": "ChatGPT Open-Source 1.0",
+        "status": "running",
+        "model": "gpt-oss-20b-Q3_K_M",
+        "offline": True
+    }
+@app.post("/chat", response_model=ChatResponse)
 def chat(req: ChatRequest):
+    prompt = f"""<|system|>
+{SYSTEM_PROMPT}
+<|user|>
+{req.message}
+<|assistant|>
+"""
     output = llm(
+        prompt,
         max_tokens=512,
+        stop=["<|user|>", "<|system|>"],
+        temperature=0.7,
     )
+    reply = output["choices"][0]["text"].strip()
+    return ChatResponse(reply=reply)