devray11 commited on
Commit
ff047e9
·
verified ·
1 Parent(s): ba01a50

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +57 -23
main.py CHANGED
@@ -3,9 +3,11 @@ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
 
6
 
7
  app = FastAPI()
8
 
 
9
  app.add_middleware(
10
  CORSMiddleware,
11
  allow_origins=["*"],
@@ -13,36 +15,68 @@ app.add_middleware(
13
  allow_headers=["*"],
14
  )
15
 
16
- # Download Aevis-Medical-SLM
17
- try:
18
- print("📥 Downloading model...")
19
- model_path = hf_hub_download(
20
- repo_id="devray11/Aevis-Medical-SLM",
21
- filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
22
- )
23
- # Initialize Model (Optimized for 2-core CPU)
24
- llm = Llama(model_path=model_path, n_ctx=1024, n_threads=2)
25
- print("✅ Model Loaded Successfully")
26
- except Exception as e:
27
- print(f"❌ Error: {e}")
28
- llm = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  class Query(BaseModel):
31
  prompt: str
32
 
 
33
  @app.post("/generate")
34
  async def generate(query: Query):
35
- if not llm:
 
 
36
  return {"error": "Model not initialized"}
37
-
38
- output = llm(
39
- f"### Instruction:\n{query.prompt}\n\n### Response:\n",
40
- max_tokens=256,
41
- stop=["###"],
42
- echo=False
43
- )
44
- return {"response": output["choices"][0]["text"]}
 
 
 
 
 
 
 
 
45
 
46
  @app.get("/")
47
  def health():
48
- return {"status": "Aevis API is running"}
 
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
+ import os
7
 
8
  app = FastAPI()
9
 
10
+ # CORS (allow all for now)
11
  app.add_middleware(
12
  CORSMiddleware,
13
  allow_origins=["*"],
 
15
  allow_headers=["*"],
16
  )
17
 
18
+ # Global model variable
19
+ llm = None
20
+
21
+ def load_model():
22
+ global llm
23
+ try:
24
+ print("📥 Downloading model from Hugging Face...")
25
+
26
+ model_path = hf_hub_download(
27
+ repo_id="devray11/Aevis-Medical-SLM",
28
+ filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
29
+ )
30
+
31
+ print("⚙️ Initializing model...")
32
+
33
+ llm = Llama(
34
+ model_path=model_path,
35
+ n_ctx=512, # Reduced for low RAM
36
+ n_threads=2, # HF free CPU = 2 cores
37
+ n_batch=128,
38
+ use_mmap=True,
39
+ use_mlock=False
40
+ )
41
+
42
+ print("✅ Model Loaded Successfully")
43
+
44
+ except Exception as e:
45
+ print(f"❌ Model Load Error: {e}")
46
+ llm = None
47
+
48
+
49
+ # Load model at startup
50
+ load_model()
51
+
52
 
53
  class Query(BaseModel):
54
  prompt: str
55
 
56
+
57
  @app.post("/generate")
58
  async def generate(query: Query):
59
+ global llm
60
+
61
+ if llm is None:
62
  return {"error": "Model not initialized"}
63
+
64
+ try:
65
+ output = llm(
66
+ f"### Instruction:\n{query.prompt}\n\n### Response:\n",
67
+ max_tokens=128, # Reduced for speed
68
+ stop=["###"],
69
+ echo=False
70
+ )
71
+
72
+ return {
73
+ "response": output["choices"][0]["text"].strip()
74
+ }
75
+
76
+ except Exception as e:
77
+ return {"error": str(e)}
78
+
79
 
80
  @app.get("/")
81
  def health():
82
+ return {"status": "Aevis API is running 🚀"}