Spaces:

SharmaGroups07
/

ai-engine

Running

SharmaGroups07 commited on Feb 19

Commit

cf95752

verified ·

1 Parent(s): ef3a046

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 app = FastAPI()
@@ -15,8 +16,9 @@ model_path = hf_hub_download(
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048,
-    n_threads=2
 )
 class ChatRequest(BaseModel):
@@ -28,8 +30,13 @@ def root():
 @app.post("/chat")
 def chat(req: ChatRequest):
     output = llm(
-        f"<|user|>{req.message}<|assistant|>",
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
@@ -39,8 +46,6 @@ def chat(req: ChatRequest):
     return {"reply": output["choices"][0]["text"]}
-# ⭐ THIS PART WAS MISSING
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+import multiprocessing
 app = FastAPI()
 llm = Llama(
     model_path=model_path,
+    n_ctx=4096,
+    n_threads=multiprocessing.cpu_count(),
+    n_gpu_layers=0
 )
 class ChatRequest(BaseModel):
 @app.post("/chat")
 def chat(req: ChatRequest):
+    system_prompt = "<|system|>You are a professional AI assistant. Answer clearly, structured, and concisely using markdown formatting.<|end|>"
+    prompt = system_prompt + f"<|user|>{req.message}<|assistant|>"
     output = llm(
+        prompt,
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
     return {"reply": output["choices"][0]["text"]}
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)