Spaces:

devray11
/

Aevis-Medical-API

Sleeping

File size: 1,742 Bytes

bcc2a47
a2f6c5c
bcc2a47
 
 
ff047e9
bcc2a47
 
 
ff047e9
a2f6c5c
 
 
 
 
 
 
ff047e9
 
 
 
 
 
 
 
 
 
2bf6e1e
ff047e9
 
 
 
 
 
4d7f8c5
ff047e9
4d7f8c5
ff047e9
 
 
 
 
 
 
 
 
 
 
 
 
 
8da5546
252a572
bcc2a47
 
ff047e9
bcc2a47
252a572
ff047e9
 
 
252a572
ff047e9
 
 
 
4d7f8c5
ff047e9
 
 
 
 
 
 
 
 
 
 
bcc2a47
 
252a572
ff047e9

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

app = FastAPI()

# CORS (allow all for now)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# Global model variable
llm = None

def load_model():
    global llm
    try:
        print("📥 Downloading model from Hugging Face...")

        model_path = hf_hub_download(
            repo_id="devray11/Aevis-Medical-SLM",
            filename="Aevis.Q4_K_M.gguf"
        )

        print("⚙️ Initializing model...")

        llm = Llama(
            model_path=model_path,
            n_ctx=128,          # Reduced for low RAM
            n_threads=2,        # HF free CPU = 2 cores
            n_batch=16,
            use_mmap=True,
            use_mlock=False
        )

        print("✅ Model Loaded Successfully")

    except Exception as e:
        print(f"❌ Model Load Error: {e}")
        llm = None


# Load model at startup
load_model()


class Query(BaseModel):
    prompt: str


@app.post("/generate")
async def generate(query: Query):
    global llm

    if llm is None:
        return {"error": "Model not initialized"}

    try:
        output = llm(
            f"### Instruction:\n{query.prompt}\n\n### Response:\n",
            max_tokens=64,     # Reduced for speed
            stop=["###"],
            echo=False
        )

        return {
            "response": output["choices"][0]["text"].strip()
        }

    except Exception as e:
        return {"error": str(e)}


@app.get("/")
def health():
    return {"status": "Aevis API is running 🚀"}