Spaces:

CooLLaMACEO
/

ChatGPTOpenSource1.0

Running

App Files Files Community

CooLLaMACEO commited on Feb 4

Commit

5e45853

verified ·

1 Parent(s): 40f58a6

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -51

app.py CHANGED Viewed

@@ -1,67 +1,85 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-from llama_cpp import Llama
 import os
-# =========================
-# Config
-# =========================
 MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
-SYSTEM_PROMPT = (
-    "You are ChatGPT Open-Source 1.0, a high-performance local AI. "
-    "You were built by the open-source community. "
-    "You are helpful, witty, and proud to run locally without the internet."
-)
-# =========================
-# Load Model (ON START)
-# =========================
-print("🔥 Loading model...")
 llm = Llama(
     model_path=MODEL_PATH,
-    n_ctx=16384,
     n_threads=os.cpu_count(),
-    n_batch=256,
-    verbose=False,
 )
-print("✅ Model loaded!")
-# =========================
-# FastAPI
-# =========================
-app = FastAPI(title="ChatGPT Open-Source 1.0")
-class ChatRequest(BaseModel):
-    message: str
-class ChatResponse(BaseModel):
-    reply: str
-@app.get("/")
-def root():
-    return {
-        "name": "ChatGPT Open-Source 1.0",
-        "status": "running",
-        "model": "gpt-oss-20b-Q3_K_M",
-        "offline": True
-    }
-@app.post("/chat", response_model=ChatResponse)
-def chat(req: ChatRequest):
-    prompt = f"""<|system|>
-{SYSTEM_PROMPT}
-<|user|>
-{req.message}
-<|assistant|>
-"""
-    output = llm(
-        prompt,
-        max_tokens=512,
-        stop=["<|user|>", "<|system|>"],
-        temperature=0.7,
-    )
-    reply = output["choices"][0]["text"].strip()
-    return ChatResponse(reply=reply)

 import os
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import HTMLResponse, JSONResponse
+from llama_cpp import Llama
+# ==========================================
+# 1. AI Model Configuration
+# ==========================================
 MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
+print("🔥 ChatGPT Open-Source 1.0: Initializing 20B Engine...")
+# We use a smaller n_ctx (2048) to ensure we don't exceed HF's 16GB RAM
+# once the 10.7GB model is fully loaded.
 llm = Llama(
     model_path=MODEL_PATH,
+    n_ctx=2048,
     n_threads=os.cpu_count(),
+    n_batch=512,
+    verbose=True
 )
+print("✅ Brain Linked! System Online.")
+# ==========================================
+# 2. FastAPI Setup
+# ==========================================
+app = FastAPI(title="ChatGPT Open-Source 1.0 Backend")
+# Enable CORS so your GitHub site can talk to this Hugging Face Space
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"], # Change to your github.io URL for better security later
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ==========================================
+# 3. Routes
+# ==========================================
+@app.get("/", response_class=HTMLResponse)
+async def get_ui():
+    """Serves the local index.html UI"""
+    if os.path.exists("index.html"):
+        with open("index.html", "r") as f:
+            return f.read()
+    return "<h1>System Online</h1><p>Backend is running, but index.html was not found.</p>"
+@app.post("/chat")
+async def chat(request: Request):
+    """Handles AI Chat Requests"""
+    try:
+        data = await request.json()
+        user_message = data.get("message", "")
+        if not user_message:
+            return JSONResponse({"response": "I didn't receive a message."}, status_code=400)
+        # Formatting for the GPT-OSS model architecture
+        prompt = f"<|system|>You are ChatGPT Open-Source 1.0, a helpful local AI.<|user|>{user_message}<|assistant|>"
+        # Generate response
+        output = llm(
+            prompt,
+            max_tokens=512,
+            stop=["<|user|>", "<|system|>", "</s>"],
+            temperature=0.7
+        )
+        reply = output["choices"][0]["text"].strip()
+        return JSONResponse({"response": reply})
+    except Exception as e:
+        print(f"❌ Error during inference: {e}")
+        return JSONResponse({"response": "My brain encountered an error processing that."}, status_code=500)
+# ==========================================
+# 4. Health Check
+# ==========================================
+@app.get("/health")
+async def health():
+    return {"status": "ready", "model": "20B-Q3_K_M", "ram_bypass": True}