CooLLaMACEO commited on
Commit
f043989
·
verified ·
1 Parent(s): 3294758

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -8,10 +8,10 @@ import uvicorn
8
 
9
  app = FastAPI()
10
 
11
- # IMPORTANT: This allows your GitHub frontend to talk to this API
12
  app.add_middleware(
13
  CORSMiddleware,
14
- allow_origins=["*"], # Change this to your specific GitHub URL for better security later
15
  allow_credentials=True,
16
  allow_methods=["*"],
17
  allow_headers=["*"],
@@ -20,13 +20,14 @@ app.add_middleware(
20
  security = HTTPBearer()
21
  MY_API_KEY = "my-secret-key-456"
22
 
23
- # Load MPT-7B Q2
24
- # Optimized for 7B parameters on limited RAM
25
  llm = Llama(
26
  model_path="./mpt-7b-q2.gguf",
27
  n_ctx=2048,
28
- n_threads=4,
29
  n_batch=512,
 
30
  verbose=False
31
  )
32
 
 
8
 
9
  app = FastAPI()
10
 
11
+ # Enable CORS for your GitHub-hosted frontend
12
  app.add_middleware(
13
  CORSMiddleware,
14
+ allow_origins=["*"],
15
  allow_credentials=True,
16
  allow_methods=["*"],
17
  allow_headers=["*"],
 
20
  security = HTTPBearer()
21
  MY_API_KEY = "my-secret-key-456"
22
 
23
+ # Load MPT-7B Q2 - Optimized for CPU only
24
+ # This assumes the Dockerfile renamed the file to 'mpt-7b-q2.gguf'
25
  llm = Llama(
26
  model_path="./mpt-7b-q2.gguf",
27
  n_ctx=2048,
28
+ n_threads=4, # Optimized for HF Free Tier (2-4 vCPU)
29
  n_batch=512,
30
+ n_gpu_layers=0, # Force CPU-only to match the base image
31
  verbose=False
32
  )
33