Spaces:

SharmaGroups07
/

ai-engine

Running

SharmaGroups07 commited on Feb 18

Commit

73a88d0

verified ·

1 Parent(s): 8e15e85

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,42 +1,36 @@
 from fastapi import FastAPI
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-import uvicorn
 app = FastAPI()
 MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
 MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
-print("Downloading model...")
 model_path = hf_hub_download(
     repo_id=MODEL_REPO,
     filename=MODEL_FILE
 )
-print("Loading model...")
 llm = Llama(
     model_path=model_path,
     n_ctx=2048,
     n_threads=2
 )
-print("Model loaded successfully!")
 @app.get("/")
 def root():
     return {"status": "AI engine running"}
-@app.get("/generate")
-def generate(prompt: str):
     output = llm(
-        prompt,
-        max_tokens=200,
-        temperature=0.7
     )
-    return {"response": output}
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI
+from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 app = FastAPI()
 MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
 MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
 model_path = hf_hub_download(
     repo_id=MODEL_REPO,
     filename=MODEL_FILE
 )
 llm = Llama(
     model_path=model_path,
     n_ctx=2048,
     n_threads=2
 )
+class ChatRequest(BaseModel):
+    message: str
 @app.get("/")
 def root():
     return {"status": "AI engine running"}
+@app.post("/chat")
+def chat(req: ChatRequest):
     output = llm(
+        f"<|user|>{req.message}<|assistant|>",
+        max_tokens=300,
+        stop=["<|end|>"]
     )
+    return {"reply": output["choices"][0]["text"]}