Allture commited on
Commit
37eaff3
·
verified ·
1 Parent(s): 73cd179

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -12
app.py CHANGED
@@ -1,24 +1,50 @@
1
- import os, subprocess, gradio as gr
 
 
 
 
2
 
3
- MODEL = "Dolphin-X1-8B.Q4_K_M.gguf"
4
- URL = "https://huggingface.co/dphn/Dolphin-X1-8B-GGUF/resolve/main/" + MODEL
 
 
 
 
5
 
6
  def setup():
7
- if not os.path.exists(MODEL):
8
- subprocess.run(["wget", "-q", URL])
 
 
 
9
 
10
- if not os.path.exists("llama"):
11
- subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"])
12
- subprocess.run(["cmake", "-S", "llama.cpp", "-B", "llama"])
13
- subprocess.run(["cmake", "--build", "llama", "--config", "Release"])
14
 
15
  setup()
16
 
17
- def chat(prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  p = subprocess.Popen(
19
- ["./llama/bin/main", "-m", MODEL, "-p", prompt, "-n", "200"],
20
  stdout=subprocess.PIPE
21
  )
22
  return p.stdout.read().decode(errors="ignore")
23
 
24
- gr.Interface(chat, gr.Textbox(label="Ask Dolphin X1"), gr.Textbox()).launch()
 
1
+ import os, subprocess, time
2
+ from typing import Optional
3
+ import gradio as gr
4
+ from fastapi import FastAPI, Header, HTTPException
5
+ from pydantic import BaseModel
6
 
7
+ MODEL_FILE = "Dolphin-X1-8B.Q4_K_M.gguf"
8
+ MODEL_URL = "https://huggingface.co/dphn/Dolphin-X1-8B-GGUF/resolve/main/Dolphin-X1-8B.Q4_K_M.gguf"
9
+ SPACE_API_KEY = os.getenv("SPACE_API_KEY")
10
+
11
+ BUILD_DIR = "llama"
12
+ THREADS = "4"
13
 
14
  def setup():
15
+ if not os.path.exists(MODEL_FILE):
16
+ subprocess.run(["wget", "-q", MODEL_URL, "-O", MODEL_FILE])
17
+
18
+ if not os.path.exists("llama.cpp"):
19
+ subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"])
20
 
21
+ if not os.path.exists(BUILD_DIR):
22
+ subprocess.run(["cmake", "-S", "llama.cpp", "-B", BUILD_DIR])
23
+ subprocess.run(["cmake", "--build", BUILD_DIR, "--config", "Release", "-j", THREADS])
 
24
 
25
  setup()
26
 
27
+ app = FastAPI()
28
+
29
+ class Query(BaseModel):
30
+ q: str
31
+
32
+ @app.post("/api/chat")
33
+ def chat(q: Query, x_api_key: Optional[str] = Header(None)):
34
+ if x_api_key != SPACE_API_KEY:
35
+ raise HTTPException(401, "Unauthorized")
36
+
37
+ p = subprocess.Popen(
38
+ [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q.q, "-n", "200", "-t", THREADS],
39
+ stdout=subprocess.PIPE
40
+ )
41
+ return {"reply": p.stdout.read().decode(errors="ignore")}
42
+
43
+ def ui_chat(q):
44
  p = subprocess.Popen(
45
+ [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q, "-n", "200", "-t", THREADS],
46
  stdout=subprocess.PIPE
47
  )
48
  return p.stdout.read().decode(errors="ignore")
49
 
50
+ gr.mount_gradio_app(app, gr.Interface(ui_chat, gr.Textbox(), gr.Textbox()), path="/")