mrmadblack commited on
Commit
ee6ca60
·
verified ·
1 Parent(s): cdd856a

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +18 -2
server.py CHANGED
@@ -1,6 +1,8 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  import subprocess
 
 
4
 
5
  app = FastAPI()
6
 
@@ -14,6 +16,16 @@ class ChatRequest(BaseModel):
14
  model:str
15
  prompt:str
16
 
 
 
 
 
 
 
 
 
 
 
17
  @app.get("/")
18
  def root():
19
  return {"status":"running"}
@@ -25,7 +37,7 @@ def chat(req:ChatRequest):
25
 
26
  result=subprocess.run(
27
  [
28
- "./llama.cpp/main",
29
  "-m",model_path,
30
  "-p",req.prompt,
31
  "-n","200"
@@ -34,4 +46,8 @@ def chat(req:ChatRequest):
34
  text=True
35
  )
36
 
37
- return {"response":result.stdout}
 
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  import subprocess
4
+ import os
5
+ import uvicorn
6
 
7
  app = FastAPI()
8
 
 
16
  model:str
17
  prompt:str
18
 
19
+ # create model folder
20
+ os.makedirs("models", exist_ok=True)
21
+
22
+ # download TinyLlama if missing
23
+ if not os.path.exists("models/tinyllama.gguf"):
24
+ os.system(
25
+ "curl -L -o models/tinyllama.gguf "
26
+ "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat.Q4_K_M.gguf"
27
+ )
28
+
29
  @app.get("/")
30
  def root():
31
  return {"status":"running"}
 
37
 
38
  result=subprocess.run(
39
  [
40
+ "./llama.cpp/build/bin/llama-cli",
41
  "-m",model_path,
42
  "-p",req.prompt,
43
  "-n","200"
 
46
  text=True
47
  )
48
 
49
+ return {"response":result.stdout}
50
+
51
+
52
+ if __name__ == "__main__":
53
+ uvicorn.run(app, host="0.0.0.0", port=7860)