devray11 commited on
Commit
252a572
·
verified ·
1 Parent(s): 73594c1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +21 -37
main.py CHANGED
@@ -3,11 +3,9 @@ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
- import os
7
 
8
  app = FastAPI()
9
 
10
- # Enable CORS for your React/Lovable frontend
11
  app.add_middleware(
12
  CORSMiddleware,
13
  allow_origins=["*"],
@@ -15,50 +13,36 @@ app.add_middleware(
15
  allow_headers=["*"],
16
  )
17
 
18
- # Configuration
19
- REPO_ID = "devray11/Aevis-Medical-SLM"
20
- MODEL_FILENAME = "DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
21
-
22
- print(f"🚀 Initializing Aevis Medical SLM...")
23
-
24
- # Download model from Hugging Face Hub
25
  try:
26
- model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILENAME)
27
- print(f"✅ Model downloaded to: {model_path}")
 
 
 
 
 
 
28
  except Exception as e:
29
- print(f"❌ Error downloading model: {e}")
30
- model_path = None
31
 
32
- # Initialize Model with optimized CPU settings
33
- # n_ctx=1024 (Saves RAM), n_threads=2 (Matches HF Free Tier)
34
- llm = Llama(
35
- model_path=model_path,
36
- n_ctx=1024,
37
- n_threads=2,
38
- n_batch=512,
39
- verbose=True
40
- ) if model_path else None
41
-
42
- class ChatRequest(BaseModel):
43
  prompt: str
44
 
45
  @app.post("/generate")
46
- async def generate(request: ChatRequest):
47
  if not llm:
48
- return {"error": "Model not loaded properly"}
49
-
50
- # Standard instruction format for fine-tuned medical models
51
- prompt = f"### Instruction:\n{request.prompt}\n\n### Response:\n"
52
 
53
- response = llm(
54
- prompt,
55
- max_tokens=512,
56
- stop=["###", "</s>"],
57
  echo=False
58
  )
59
-
60
- return {"response": response["choices"][0]["text"].strip()}
61
 
62
  @app.get("/")
63
- def home():
64
- return {"message": "Aevis Medical SLM API is Online", "model": REPO_ID}
 
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
 
6
 
7
  app = FastAPI()
8
 
 
9
  app.add_middleware(
10
  CORSMiddleware,
11
  allow_origins=["*"],
 
13
  allow_headers=["*"],
14
  )
15
 
16
+ # Download Aevis-Medical-SLM
 
 
 
 
 
 
17
  try:
18
+ print("📥 Downloading model...")
19
+ model_path = hf_hub_download(
20
+ repo_id="devray11/Aevis-Medical-SLM",
21
+ filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
22
+ )
23
+ # Initialize Model (Optimized for 2-core CPU)
24
+ llm = Llama(model_path=model_path, n_ctx=1024, n_threads=2)
25
+ print("✅ Model Loaded Successfully")
26
  except Exception as e:
27
+ print(f"❌ Error: {e}")
28
+ llm = None
29
 
30
+ class Query(BaseModel):
 
 
 
 
 
 
 
 
 
 
31
  prompt: str
32
 
33
  @app.post("/generate")
34
+ async def generate(query: Query):
35
  if not llm:
36
+ return {"error": "Model not initialized"}
 
 
 
37
 
38
+ output = llm(
39
+ f"### Instruction:\n{query.prompt}\n\n### Response:\n",
40
+ max_tokens=256,
41
+ stop=["###"],
42
  echo=False
43
  )
44
+ return {"response": output["choices"][0]["text"]}
 
45
 
46
  @app.get("/")
47
+ def health():
48
+ return {"status": "Aevis API is running"}