Spaces:

SharmaGroups07
/

ai-engine

Running

SharmaGroups07 commited on Feb 18

Commit

4f9c2f2

verified ·

1 Parent(s): b8ede8c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,42 +1,48 @@
 from fastapi import FastAPI
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-import uvicorn
 app = FastAPI()
 MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
 MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
-print("Downloading model...")
 model_path = hf_hub_download(
     repo_id=MODEL_REPO,
     filename=MODEL_FILE
 )
-print("Loading model...")
 llm = Llama(
     model_path=model_path,
     n_ctx=2048,
     n_threads=2
 )
-print("Model loaded successfully!")
-@app.get("/")
-def root():
-    return {"status": "AI engine running"}
-@app.get("/generate")
-def generate(prompt: str):
-    output = llm(
-        prompt,
-        max_tokens=200,
-        temperature=0.7
     )
-    return {"response": output}
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from fastapi import FastAPI
+from pydantic import BaseModel
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 app = FastAPI()
+# ---------- LOAD MODEL ----------
 MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
 MODEL_FILE = "Phi-3-mini-4k-instruct-q4.gguf"
 model_path = hf_hub_download(
     repo_id=MODEL_REPO,
     filename=MODEL_FILE
 )
 llm = Llama(
     model_path=model_path,
     n_ctx=2048,
     n_threads=2
 )
+# ---------- REQUEST FORMAT ----------
+class ChatRequest(BaseModel):
+    message: str
+# ---------- CHAT ENDPOINT ----------
+@app.post("/chat")
+def chat(req: ChatRequest):
+    response = llm(
+        f"User: {req.message}\nAssistant:",
+        max_tokens=300
     )
+    reply = response["choices"][0]["text"]
+    return {
+        "reply": reply.strip()
+    }
+# ---------- HEALTH CHECK ----------
+@app.get("/")
+def root():
+    return {"status": "AI Engine Running"}