devray11 commited on
Commit
a2f6c5c
·
verified ·
1 Parent(s): 5671570

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +23 -6
main.py CHANGED
@@ -1,29 +1,46 @@
1
  from fastapi import FastAPI
 
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
 
6
  app = FastAPI()
7
 
8
- # Downloads your specific model
 
 
 
 
 
 
 
 
 
9
  model_path = hf_hub_download(
10
  repo_id="devray11/Aevis-Medical-SLM",
11
  filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
12
  )
13
 
14
- # Initializes the AI engine
15
- llm = Llama(model_path=model_path, n_ctx=2048)
16
 
17
  class Query(BaseModel):
18
  prompt: str
19
 
20
  @app.post("/generate")
21
  async def generate(query: Query):
22
- # Formats the prompt exactly like your training data
23
  fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
24
- output = llm(fmt_prompt, max_tokens=512, stop=["###"])
 
 
 
 
 
 
 
25
  return {"response": output["choices"][0]["text"]}
26
 
27
  @app.get("/")
28
  def health():
29
- return {"status": "Aevis API is Online"}
 
1
  from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
 
7
  app = FastAPI()
8
 
9
+ # Enable CORS so your website can call this API
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"],
13
+ allow_methods=["*"],
14
+ allow_headers=["*"],
15
+ )
16
+
17
+ # Download the model from your repo
18
+ print("Fetching Aevis Model...")
19
  model_path = hf_hub_download(
20
  repo_id="devray11/Aevis-Medical-SLM",
21
  filename="DeepSeek-R1-Distill-Llama-8B.Q4_K_M.gguf"
22
  )
23
 
24
+ # Load model (Optimized for 2GB RAM / CPU)
25
+ llm = Llama(model_path=model_path, n_ctx=1024, n_threads=2)
26
 
27
  class Query(BaseModel):
28
  prompt: str
29
 
30
  @app.post("/generate")
31
  async def generate(query: Query):
32
+ # Prompt format based on your training
33
  fmt_prompt = f"### Instruction:\n{query.prompt}\n\n### Response:\n"
34
+
35
+ output = llm(
36
+ fmt_prompt,
37
+ max_tokens=400,
38
+ stop=["###"],
39
+ echo=False
40
+ )
41
+
42
  return {"response": output["choices"][0]["text"]}
43
 
44
  @app.get("/")
45
  def health():
46
+ return {"status": "Aevis API is Online and Healthy"}