SharmaGroups07 commited on
Commit
cf95752
·
verified ·
1 Parent(s): ef3a046

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -2,6 +2,7 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
 
5
 
6
  app = FastAPI()
7
 
@@ -15,8 +16,9 @@ model_path = hf_hub_download(
15
 
16
  llm = Llama(
17
  model_path=model_path,
18
- n_ctx=2048,
19
- n_threads=2
 
20
  )
21
 
22
  class ChatRequest(BaseModel):
@@ -28,8 +30,13 @@ def root():
28
 
29
  @app.post("/chat")
30
  def chat(req: ChatRequest):
 
 
 
 
 
31
  output = llm(
32
- f"<|user|>{req.message}<|assistant|>",
33
  max_tokens=512,
34
  temperature=0.7,
35
  top_p=0.9,
@@ -39,8 +46,6 @@ def chat(req: ChatRequest):
39
 
40
  return {"reply": output["choices"][0]["text"]}
41
 
42
-
43
- # ⭐ THIS PART WAS MISSING
44
  if __name__ == "__main__":
45
  import uvicorn
46
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
+ import multiprocessing
6
 
7
  app = FastAPI()
8
 
 
16
 
17
  llm = Llama(
18
  model_path=model_path,
19
+ n_ctx=4096,
20
+ n_threads=multiprocessing.cpu_count(),
21
+ n_gpu_layers=0
22
  )
23
 
24
  class ChatRequest(BaseModel):
 
30
 
31
  @app.post("/chat")
32
  def chat(req: ChatRequest):
33
+
34
+ system_prompt = "<|system|>You are a professional AI assistant. Answer clearly, structured, and concisely using markdown formatting.<|end|>"
35
+
36
+ prompt = system_prompt + f"<|user|>{req.message}<|assistant|>"
37
+
38
  output = llm(
39
+ prompt,
40
  max_tokens=512,
41
  temperature=0.7,
42
  top_p=0.9,
 
46
 
47
  return {"reply": output["choices"][0]["text"]}
48
 
 
 
49
  if __name__ == "__main__":
50
  import uvicorn
51
  uvicorn.run(app, host="0.0.0.0", port=7860)