devray11's picture
Update main.py
4d7f8c5 verified
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
app = FastAPI()
# CORS (allow all for now)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Global model variable
llm = None
def load_model():
global llm
try:
print("πŸ“₯ Downloading model from Hugging Face...")
model_path = hf_hub_download(
repo_id="devray11/Aevis-Medical-SLM",
filename="Aevis.Q4_K_M.gguf"
)
print("βš™οΈ Initializing model...")
llm = Llama(
model_path=model_path,
n_ctx=128, # Reduced for low RAM
n_threads=2, # HF free CPU = 2 cores
n_batch=16,
use_mmap=True,
use_mlock=False
)
print("βœ… Model Loaded Successfully")
except Exception as e:
print(f"❌ Model Load Error: {e}")
llm = None
# Load model at startup
load_model()
class Query(BaseModel):
prompt: str
@app.post("/generate")
async def generate(query: Query):
global llm
if llm is None:
return {"error": "Model not initialized"}
try:
output = llm(
f"### Instruction:\n{query.prompt}\n\n### Response:\n",
max_tokens=64, # Reduced for speed
stop=["###"],
echo=False
)
return {
"response": output["choices"][0]["text"].strip()
}
except Exception as e:
return {"error": str(e)}
@app.get("/")
def health():
return {"status": "Aevis API is running πŸš€"}