Spaces:

devray11
/

Aevis-Medical-API

Sleeping

App Files Files Community

devray11 commited on Mar 24

Commit

8da5546

verified ·

1 Parent(s): 010d1c5

Update main.py

Browse files

Files changed (1) hide show

main.py +37 -25

main.py CHANGED Viewed

@@ -3,10 +3,11 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 app = FastAPI()
-# Enable CORS so your Lovable frontend can connect
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -14,39 +15,50 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Download your Aevis Model
-print("Checking for Model...")
-model_path = hf_hub_download(
-    repo_id="devray11/Aevis-Medical-SLM",
-    filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
-)
-# Load the model (Configured for Free Tier CPU/RAM)
 llm = Llama(
-    model_path=model_path,
-    n_ctx=1024,      # Context window
-    n_threads=2,     # Number of CPU cores to use
-    n_batch=512      # Processing batch size
-)
-class Query(BaseModel):
     prompt: str
 @app.post("/generate")
-async def generate(query: Query):
-    # Prompting structure for your fine-tuned model
-    fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
-    # Run Inference
-    output = llm(
-        fmt_prompt,
-        max_tokens=450,
-        stop=["###"],
         echo=False
     )
-    return {"response": output["choices"][0]["text"]}
 @app.get("/")
-def health():
-    return {"status": "Aevis API is Live and Ready"}

 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+import os
 app = FastAPI()
+# Enable CORS for your React/Lovable frontend
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Configuration
+REPO_ID = "devray11/Aevis-Medical-SLM"
+MODEL_FILENAME = "DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
+print(f"🚀 Initializing Aevis Medical SLM...")
+# Download model from Hugging Face Hub
+try:
+    model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
+    print(f"✅ Model downloaded to: {model_path}")
+except Exception as e:
+    print(f"❌ Error downloading model: {e}")
+    model_path = None
+# Initialize Model with optimized CPU settings
+# n_ctx=1024 (Saves RAM), n_threads=2 (Matches HF Free Tier)
 llm = Llama(
+    model_path=model_path,
+    n_ctx=1024,
+    n_threads=2,
+    n_batch=512,
+    verbose=True
+) if model_path else None
+class ChatRequest(BaseModel):
     prompt: str
 @app.post("/generate")
+async def generate(request: ChatRequest):
+    if not llm:
+        return {"error": "Model not loaded properly"}
+    # Standard instruction format for fine-tuned medical models
+    prompt = f"### Instruction:\n{request.prompt}\n\n### Response:\n"
+    response = llm(
+        prompt,
+        max_tokens=512,
+        stop=["###", "</s>"],
         echo=False
     )
+    return {"response": response["choices"][0]["text"].strip()}
 @app.get("/")
+def home():
+    return {"message": "Aevis Medical SLM API is Online", "model": REPO_ID}