SharmaGroups07 commited on
Commit
cd2d9ab
·
verified ·
1 Parent(s): 0977a91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -8,25 +8,35 @@ app = FastAPI()
8
  MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
9
  MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
10
 
 
 
11
  model_path = hf_hub_download(
12
  repo_id=MODEL_REPO,
13
  filename=MODEL_FILE
14
  )
15
 
 
 
16
  llm = Llama(
17
  model_path=model_path,
18
  n_ctx=2048,
19
  n_threads=2
20
  )
21
 
 
 
22
  class Prompt(BaseModel):
23
  prompt: str
24
 
25
- @app.post("/generate")
26
- def generate(req: Prompt):
27
- output = llm(req.prompt, max_tokens=200)
28
- return {"response": output["choices"][0]["text"]}
29
-
30
  @app.get("/")
31
  def root():
32
- return {"status": "AI running"}
 
 
 
 
 
 
 
 
 
 
8
  MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
9
  MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
10
 
11
+ print("Downloading model...")
12
+
13
  model_path = hf_hub_download(
14
  repo_id=MODEL_REPO,
15
  filename=MODEL_FILE
16
  )
17
 
18
+ print("Loading model...")
19
+
20
  llm = Llama(
21
  model_path=model_path,
22
  n_ctx=2048,
23
  n_threads=2
24
  )
25
 
26
+ print("Model loaded successfully!")
27
+
28
  class Prompt(BaseModel):
29
  prompt: str
30
 
 
 
 
 
 
31
  @app.get("/")
32
  def root():
33
+ return {"status": "AI Engine Running 🚀"}
34
+
35
+ @app.post("/generate")
36
+ def generate(data: Prompt):
37
+ output = llm(
38
+ data.prompt,
39
+ max_tokens=200,
40
+ stop=["</s>"]
41
+ )
42
+ return {"response": output["choices"][0]["text"]}