Spaces:

devray11
/

Aevis-Medical-API

Sleeping

devray11 commited on Mar 24

Commit

010d1c5

verified ·

1 Parent(s): e91e62c

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -6,7 +6,7 @@ from huggingface_hub import hf_hub_download
 app = FastAPI()
-# Enable CORS so your website can call this API
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -14,27 +14,33 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Download the model from your repo
-print("Fetching Aevis Model...")
 model_path = hf_hub_download(
     repo_id="devray11/Aevis-Medical-SLM",
     filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
 )
-# Load model (Optimized for 2GB RAM / CPU)
-llm = Llama(model_path=model_path, n_ctx=1024, n_threads=2)
 class Query(BaseModel):
     prompt: str
 @app.post("/generate")
 async def generate(query: Query):
-    # Prompt format based on your training
     fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
     output = llm(
         fmt_prompt,
-        max_tokens=400,
         stop=["###"],
         echo=False
     )
@@ -43,4 +49,4 @@ async def generate(query: Query):
 @app.get("/")
 def health():
-    return {"status": "Aevis API is Online and Healthy"}

 app = FastAPI()
+# Enable CORS so your Lovable frontend can connect
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
     allow_headers=["*"],
 )
+# Download your Aevis Model
+print("Checking for Model...")
 model_path = hf_hub_download(
     repo_id="devray11/Aevis-Medical-SLM",
     filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
 )
+# Load the model (Configured for Free Tier CPU/RAM)
+llm = Llama(
+    model_path=model_path,
+    n_ctx=1024,      # Context window
+    n_threads=2,     # Number of CPU cores to use
+    n_batch=512      # Processing batch size
+)
 class Query(BaseModel):
     prompt: str
 @app.post("/generate")
 async def generate(query: Query):
+    # Prompting structure for your fine-tuned model
     fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
+    # Run Inference
     output = llm(
         fmt_prompt,
+        max_tokens=450,
         stop=["###"],
         echo=False
     )
 @app.get("/")
 def health():
+    return {"status": "Aevis API is Live and Ready"}