SharmaGroups07 commited on
Commit
73a88d0
·
verified ·
1 Parent(s): 8e15e85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -15
app.py CHANGED
@@ -1,42 +1,36 @@
1
  from fastapi import FastAPI
 
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
- import uvicorn
5
 
6
  app = FastAPI()
7
 
8
  MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
9
  MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
10
 
11
- print("Downloading model...")
12
  model_path = hf_hub_download(
13
  repo_id=MODEL_REPO,
14
  filename=MODEL_FILE
15
  )
16
 
17
- print("Loading model...")
18
  llm = Llama(
19
  model_path=model_path,
20
  n_ctx=2048,
21
  n_threads=2
22
  )
23
 
24
- print("Model loaded successfully!")
 
25
 
26
  @app.get("/")
27
  def root():
28
  return {"status": "AI engine running"}
29
 
30
- @app.get("/generate")
31
- def generate(prompt: str):
32
  output = llm(
33
- prompt,
34
- max_tokens=200,
35
- temperature=0.7
36
  )
37
- return {"response": output}
38
-
39
-
40
-
41
- if __name__ == "__main__":
42
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
 
5
 
6
  app = FastAPI()
7
 
8
  MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
9
  MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
10
 
 
11
  model_path = hf_hub_download(
12
  repo_id=MODEL_REPO,
13
  filename=MODEL_FILE
14
  )
15
 
 
16
  llm = Llama(
17
  model_path=model_path,
18
  n_ctx=2048,
19
  n_threads=2
20
  )
21
 
22
+ class ChatRequest(BaseModel):
23
+ message: str
24
 
25
  @app.get("/")
26
  def root():
27
  return {"status": "AI engine running"}
28
 
29
+ @app.post("/chat")
30
+ def chat(req: ChatRequest):
31
  output = llm(
32
+ f"<|user|>{req.message}<|assistant|>",
33
+ max_tokens=300,
34
+ stop=["<|end|>"]
35
  )
36
+ return {"reply": output["choices"][0]["text"]}