Spaces:
Running
Running
Update server.py
Browse files
server.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import subprocess
|
|
|
|
|
|
|
| 4 |
|
| 5 |
app = FastAPI()
|
| 6 |
|
|
@@ -14,6 +16,16 @@ class ChatRequest(BaseModel):
|
|
| 14 |
model:str
|
| 15 |
prompt:str
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
@app.get("/")
|
| 18 |
def root():
|
| 19 |
return {"status":"running"}
|
|
@@ -25,7 +37,7 @@ def chat(req:ChatRequest):
|
|
| 25 |
|
| 26 |
result=subprocess.run(
|
| 27 |
[
|
| 28 |
-
"./llama.cpp/
|
| 29 |
"-m",model_path,
|
| 30 |
"-p",req.prompt,
|
| 31 |
"-n","200"
|
|
@@ -34,4 +46,8 @@ def chat(req:ChatRequest):
|
|
| 34 |
text=True
|
| 35 |
)
|
| 36 |
|
| 37 |
-
return {"response":result.stdout}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
from pydantic import BaseModel
|
| 3 |
import subprocess
|
| 4 |
+
import os
|
| 5 |
+
import uvicorn
|
| 6 |
|
| 7 |
app = FastAPI()
|
| 8 |
|
|
|
|
| 16 |
model:str
|
| 17 |
prompt:str
|
| 18 |
|
| 19 |
+
# create model folder
|
| 20 |
+
os.makedirs("models", exist_ok=True)
|
| 21 |
+
|
| 22 |
+
# download TinyLlama if missing
|
| 23 |
+
if not os.path.exists("models/tinyllama.gguf"):
|
| 24 |
+
os.system(
|
| 25 |
+
"curl -L -o models/tinyllama.gguf "
|
| 26 |
+
"https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat.Q4_K_M.gguf"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
@app.get("/")
|
| 30 |
def root():
|
| 31 |
return {"status":"running"}
|
|
|
|
| 37 |
|
| 38 |
result=subprocess.run(
|
| 39 |
[
|
| 40 |
+
"./llama.cpp/build/bin/llama-cli",
|
| 41 |
"-m",model_path,
|
| 42 |
"-p",req.prompt,
|
| 43 |
"-n","200"
|
|
|
|
| 46 |
text=True
|
| 47 |
)
|
| 48 |
|
| 49 |
+
return {"response":result.stdout}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|