Spaces:

mrmadblack
/

llm-api

Running

mrmadblack commited on Mar 13

Commit

ee6ca60

verified ·

1 Parent(s): cdd856a

Update server.py

Files changed (1) hide show

server.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
 import subprocess
 app = FastAPI()
@@ -14,6 +16,16 @@ class ChatRequest(BaseModel):
     model:str
     prompt:str
 @app.get("/")
 def root():
     return {"status":"running"}
@@ -25,7 +37,7 @@ def chat(req:ChatRequest):
     result=subprocess.run(
         [
-            "./llama.cpp/main",
             "-m",model_path,
             "-p",req.prompt,
             "-n","200"
@@ -34,4 +46,8 @@ def chat(req:ChatRequest):
         text=True
     )
-    return {"response":result.stdout}

 from fastapi import FastAPI
 from pydantic import BaseModel
 import subprocess
+import os
+import uvicorn
 app = FastAPI()
     model:str
     prompt:str
+# create model folder
+os.makedirs("models", exist_ok=True)
+# download TinyLlama if missing
+if not os.path.exists("models/tinyllama.gguf"):
+    os.system(
+        "curl -L -o models/tinyllama.gguf "
+        "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat.Q4_K_M.gguf"
+    )
 @app.get("/")
 def root():
     return {"status":"running"}
     result=subprocess.run(
         [
+            "./llama.cpp/build/bin/llama-cli",
             "-m",model_path,
             "-p",req.prompt,
             "-n","200"
         text=True
     )
+    return {"response":result.stdout}
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)