devray11 commited on
Commit
010d1c5
·
verified ·
1 Parent(s): e91e62c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +14 -8
main.py CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import hf_hub_download
6
 
7
  app = FastAPI()
8
 
9
- # Enable CORS so your website can call this API
10
  app.add_middleware(
11
  CORSMiddleware,
12
  allow_origins=["*"],
@@ -14,27 +14,33 @@ app.add_middleware(
14
  allow_headers=["*"],
15
  )
16
 
17
- # Download the model from your repo
18
- print("Fetching Aevis Model...")
19
  model_path = hf_hub_download(
20
  repo_id="devray11/Aevis-Medical-SLM",
21
  filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
22
  )
23
 
24
- # Load model (Optimized for 2GB RAM / CPU)
25
- llm = Llama(model_path=model_path, n_ctx=1024, n_threads=2)
 
 
 
 
 
26
 
27
  class Query(BaseModel):
28
  prompt: str
29
 
30
  @app.post("/generate")
31
  async def generate(query: Query):
32
- # Prompt format based on your training
33
  fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
34
 
 
35
  output = llm(
36
  fmt_prompt,
37
- max_tokens=400,
38
  stop=["###"],
39
  echo=False
40
  )
@@ -43,4 +49,4 @@ async def generate(query: Query):
43
 
44
  @app.get("/")
45
  def health():
46
- return {"status": "Aevis API is Online and Healthy"}
 
6
 
7
  app = FastAPI()
8
 
9
+ # Enable CORS so your Lovable frontend can connect
10
  app.add_middleware(
11
  CORSMiddleware,
12
  allow_origins=["*"],
 
14
  allow_headers=["*"],
15
  )
16
 
17
+ # Download your Aevis Model
18
+ print("Checking for Model...")
19
  model_path = hf_hub_download(
20
  repo_id="devray11/Aevis-Medical-SLM",
21
  filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
22
  )
23
 
24
+ # Load the model (Configured for Free Tier CPU/RAM)
25
+ llm = Llama(
26
+ model_path=model_path,
27
+ n_ctx=1024, # Context window
28
+ n_threads=2, # Number of CPU cores to use
29
+ n_batch=512 # Processing batch size
30
+ )
31
 
32
  class Query(BaseModel):
33
  prompt: str
34
 
35
  @app.post("/generate")
36
  async def generate(query: Query):
37
+ # Prompting structure for your fine-tuned model
38
  fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
39
 
40
+ # Run Inference
41
  output = llm(
42
  fmt_prompt,
43
+ max_tokens=450,
44
  stop=["###"],
45
  echo=False
46
  )
 
49
 
50
  @app.get("/")
51
  def health():
52
+ return {"status": "Aevis API is Live and Ready"}