Spaces:

CooLLaMACEO
/

ChatMPT

Running

CooLLaMACEO commited on Feb 4

Commit

f043989

verified ·

1 Parent(s): 3294758

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,10 +8,10 @@ import uvicorn
 app = FastAPI()
-# IMPORTANT: This allows your GitHub frontend to talk to this API
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"], # Change this to your specific GitHub URL for better security later
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -20,13 +20,14 @@ app.add_middleware(
 security = HTTPBearer()
 MY_API_KEY = "my-secret-key-456"
-# Load MPT-7B Q2
-# Optimized for 7B parameters on limited RAM
 llm = Llama(
     model_path="./mpt-7b-q2.gguf",
     n_ctx=2048,
-    n_threads=4,
     n_batch=512,
     verbose=False
 )

 app = FastAPI()
+# Enable CORS for your GitHub-hosted frontend
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 security = HTTPBearer()
 MY_API_KEY = "my-secret-key-456"
+# Load MPT-7B Q2 - Optimized for CPU only
+# This assumes the Dockerfile renamed the file to 'mpt-7b-q2.gguf'
 llm = Llama(
     model_path="./mpt-7b-q2.gguf",
     n_ctx=2048,
+    n_threads=4,      # Optimized for HF Free Tier (2-4 vCPU)
     n_batch=512,
+    n_gpu_layers=0,   # Force CPU-only to match the base image
     verbose=False
 )