Spaces:

devray11
/

Aevis-Medical-API

Sleeping

Update main.py

4d7f8c5 verified 2 months ago

1.74 kB

	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os

	app = FastAPI()

	# CORS (allow all for now)
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Global model variable
	llm = None

	def load_model():
	global llm
	try:
	print("📥 Downloading model from Hugging Face...")

	model_path = hf_hub_download(
	repo_id="devray11/Aevis-Medical-SLM",
	filename="Aevis.Q4_K_M.gguf"
	)

	print("⚙️ Initializing model...")

	llm = Llama(
	model_path=model_path,
	n_ctx=128, # Reduced for low RAM
	n_threads=2, # HF free CPU = 2 cores
	n_batch=16,
	use_mmap=True,
	use_mlock=False
	)

	print("✅ Model Loaded Successfully")

	except Exception as e:
	print(f"❌ Model Load Error: {e}")
	llm = None


	# Load model at startup
	load_model()


	class Query(BaseModel):
	prompt: str


	@app.post("/generate")
	async def generate(query: Query):
	global llm

	if llm is None:
	return {"error": "Model not initialized"}

	try:
	output = llm(
	f"### Instruction:\n{query.prompt}\n\n### Response:\n",
	max_tokens=64, # Reduced for speed
	stop=["###"],
	echo=False
	)

	return {
	"response": output["choices"][0]["text"].strip()
	}

	except Exception as e:
	return {"error": str(e)}


	@app.get("/")
	def health():
	return {"status": "Aevis API is running 🚀"}