devray11 commited on
Commit
8da5546
·
verified ·
1 Parent(s): 010d1c5

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +37 -25
main.py CHANGED
@@ -3,10 +3,11 @@ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
 
6
 
7
  app = FastAPI()
8
 
9
- # Enable CORS so your Lovable frontend can connect
10
  app.add_middleware(
11
  CORSMiddleware,
12
  allow_origins=["*"],
@@ -14,39 +15,50 @@ app.add_middleware(
14
  allow_headers=["*"],
15
  )
16
 
17
- # Download your Aevis Model
18
- print("Checking for Model...")
19
- model_path = hf_hub_download(
20
- repo_id="devray11/Aevis-Medical-SLM",
21
- filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
22
- )
23
 
24
- # Load the model (Configured for Free Tier CPU/RAM)
 
 
 
 
 
 
 
 
 
25
  llm = Llama(
26
- model_path=model_path,
27
- n_ctx=1024, # Context window
28
- n_threads=2, # Number of CPU cores to use
29
- n_batch=512 # Processing batch size
30
- )
 
31
 
32
- class Query(BaseModel):
33
  prompt: str
34
 
35
  @app.post("/generate")
36
- async def generate(query: Query):
37
- # Prompting structure for your fine-tuned model
38
- fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
 
 
 
39
 
40
- # Run Inference
41
- output = llm(
42
- fmt_prompt,
43
- max_tokens=450,
44
- stop=["###"],
45
  echo=False
46
  )
47
 
48
- return {"response": output["choices"][0]["text"]}
49
 
50
  @app.get("/")
51
- def health():
52
- return {"status": "Aevis API is Live and Ready"}
 
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
+ import os
7
 
8
  app = FastAPI()
9
 
10
+ # Enable CORS for your React/Lovable frontend
11
  app.add_middleware(
12
  CORSMiddleware,
13
  allow_origins=["*"],
 
15
  allow_headers=["*"],
16
  )
17
 
18
+ # Configuration
19
+ REPO_ID = "devray11/Aevis-Medical-SLM"
20
+ MODEL_FILENAME = "DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
21
+
22
+ print(f"🚀 Initializing Aevis Medical SLM...")
 
23
 
24
+ # Download model from Hugging Face Hub
25
+ try:
26
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
27
+ print(f"✅ Model downloaded to: {model_path}")
28
+ except Exception as e:
29
+ print(f"❌ Error downloading model: {e}")
30
+ model_path = None
31
+
32
+ # Initialize Model with optimized CPU settings
33
+ # n_ctx=1024 (Saves RAM), n_threads=2 (Matches HF Free Tier)
34
  llm = Llama(
35
+ model_path=model_path,
36
+ n_ctx=1024,
37
+ n_threads=2,
38
+ n_batch=512,
39
+ verbose=True
40
+ ) if model_path else None
41
 
42
+ class ChatRequest(BaseModel):
43
  prompt: str
44
 
45
  @app.post("/generate")
46
+ async def generate(request: ChatRequest):
47
+ if not llm:
48
+ return {"error": "Model not loaded properly"}
49
+
50
+ # Standard instruction format for fine-tuned medical models
51
+ prompt = f"### Instruction:\n{request.prompt}\n\n### Response:\n"
52
 
53
+ response = llm(
54
+ prompt,
55
+ max_tokens=512,
56
+ stop=["###", "</s>"],
 
57
  echo=False
58
  )
59
 
60
+ return {"response": response["choices"][0]["text"].strip()}
61
 
62
  @app.get("/")
63
+ def home():
64
+ return {"message": "Aevis Medical SLM API is Online", "model": REPO_ID}