Spaces:

devray11
/

Aevis-Medical-API

Sleeping

App Files Files Community

devray11 commited on Mar 24

Commit

ff047e9

verified ·

1 Parent(s): ba01a50

Update main.py

Browse files

Files changed (1) hide show

main.py +57 -23

main.py CHANGED Viewed

@@ -3,9 +3,11 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -13,36 +15,68 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Download Aevis-Medical-SLM
-try:
-    print("📥 Downloading model...")
-    model_path = hf_hub_download(
-        repo_id="devray11/Aevis-Medical-SLM",
-        filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
-    )
-    # Initialize Model (Optimized for 2-core CPU)
-    llm = Llama(model_path=model_path, n_ctx=1024, n_threads=2)
-    print("✅ Model Loaded Successfully")
-except Exception as e:
-    print(f"❌ Error: {e}")
-    llm = None
 class Query(BaseModel):
     prompt: str
 @app.post("/generate")
 async def generate(query: Query):
-    if not llm:
         return {"error": "Model not initialized"}
-    output = llm(
-        f"### Instruction:\n{query.prompt}\n\n### Response:\n",
-        max_tokens=256,
-        stop=["###"],
-        echo=False
-    )
-    return {"response": output["choices"][0]["text"]}
 @app.get("/")
 def health():
-    return {"status": "Aevis API is running"}

 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+import os
 app = FastAPI()
+# CORS (allow all for now)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Global model variable
+llm = None
+def load_model():
+    global llm
+    try:
+        print("📥 Downloading model from Hugging Face...")
+        model_path = hf_hub_download(
+            repo_id="devray11/Aevis-Medical-SLM",
+            filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
+        )
+        print("⚙️ Initializing model...")
+        llm = Llama(
+            model_path=model_path,
+            n_ctx=512,          # Reduced for low RAM
+            n_threads=2,        # HF free CPU = 2 cores
+            n_batch=128,
+            use_mmap=True,
+            use_mlock=False
+        )
+        print("✅ Model Loaded Successfully")
+    except Exception as e:
+        print(f"❌ Model Load Error: {e}")
+        llm = None
+# Load model at startup
+load_model()
 class Query(BaseModel):
     prompt: str
 @app.post("/generate")
 async def generate(query: Query):
+    global llm
+    if llm is None:
         return {"error": "Model not initialized"}
+    try:
+        output = llm(
+            f"### Instruction:\n{query.prompt}\n\n### Response:\n",
+            max_tokens=128,     # Reduced for speed
+            stop=["###"],
+            echo=False
+        )
+        return {
+            "response": output["choices"][0]["text"].strip()
+        }
+    except Exception as e:
+        return {"error": str(e)}
 @app.get("/")
 def health():
+    return {"status": "Aevis API is running 🚀"}