SharmaGroups07 commited on
Commit
4f9c2f2
·
verified ·
1 Parent(s): b8ede8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -1,42 +1,48 @@
1
  from fastapi import FastAPI
 
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
- import uvicorn
5
 
6
  app = FastAPI()
7
 
 
 
8
  MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
9
  MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
10
 
11
- print("Downloading model...")
12
  model_path = hf_hub_download(
13
  repo_id=MODEL_REPO,
14
  filename=MODEL_FILE
15
  )
16
 
17
- print("Loading model...")
18
  llm = Llama(
19
  model_path=model_path,
20
  n_ctx=2048,
21
  n_threads=2
22
  )
23
 
24
- print("Model loaded successfully!")
25
 
26
- @app.get("/")
27
- def root():
28
- return {"status": "AI engine running"}
29
-
30
- @app.get("/generate")
31
- def generate(prompt: str):
32
- output = llm(
33
- prompt,
34
- max_tokens=200,
35
- temperature=0.7
36
  )
37
- return {"response": output}
38
 
 
 
 
 
 
39
 
 
40
 
41
- if __name__ == "__main__":
42
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
1
  from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
 
5
 
6
  app = FastAPI()
7
 
8
+ # ---------- LOAD MODEL ----------
9
+
10
  MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
11
  MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
12
 
 
13
  model_path = hf_hub_download(
14
  repo_id=MODEL_REPO,
15
  filename=MODEL_FILE
16
  )
17
 
 
18
  llm = Llama(
19
  model_path=model_path,
20
  n_ctx=2048,
21
  n_threads=2
22
  )
23
 
24
+ # ---------- REQUEST FORMAT ----------
25
 
26
+ class ChatRequest(BaseModel):
27
+ message: str
28
+
29
+ # ---------- CHAT ENDPOINT ----------
30
+
31
+ @app.post("/chat")
32
+ def chat(req: ChatRequest):
33
+ response = llm(
34
+ f"User: {req.message}\nAssistant:",
35
+ max_tokens=300
36
  )
 
37
 
38
+ reply = response["choices"][0]["text"]
39
+
40
+ return {
41
+ "reply": reply.strip()
42
+ }
43
 
44
+ # ---------- HEALTH CHECK ----------
45
 
46
+ @app.get("/")
47
+ def root():
48
+ return {"status": "AI Engine Running"}