dolphin / app.py
Allture's picture
Update app.py
37eaff3 verified
import os, subprocess, time
from typing import Optional
import gradio as gr
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
MODEL_FILE = "Dolphin-X1-8B.Q4_K_M.gguf"
MODEL_URL = "https://huggingface.co/dphn/Dolphin-X1-8B-GGUF/resolve/main/Dolphin-X1-8B.Q4_K_M.gguf"
SPACE_API_KEY = os.getenv("SPACE_API_KEY")
BUILD_DIR = "llama"
THREADS = "4"
def setup():
if not os.path.exists(MODEL_FILE):
subprocess.run(["wget", "-q", MODEL_URL, "-O", MODEL_FILE])
if not os.path.exists("llama.cpp"):
subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"])
if not os.path.exists(BUILD_DIR):
subprocess.run(["cmake", "-S", "llama.cpp", "-B", BUILD_DIR])
subprocess.run(["cmake", "--build", BUILD_DIR, "--config", "Release", "-j", THREADS])
setup()
app = FastAPI()
class Query(BaseModel):
q: str
@app.post("/api/chat")
def chat(q: Query, x_api_key: Optional[str] = Header(None)):
if x_api_key != SPACE_API_KEY:
raise HTTPException(401, "Unauthorized")
p = subprocess.Popen(
[f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q.q, "-n", "200", "-t", THREADS],
stdout=subprocess.PIPE
)
return {"reply": p.stdout.read().decode(errors="ignore")}
def ui_chat(q):
p = subprocess.Popen(
[f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q, "-n", "200", "-t", THREADS],
stdout=subprocess.PIPE
)
return p.stdout.read().decode(errors="ignore")
gr.mount_gradio_app(app, gr.Interface(ui_chat, gr.Textbox(), gr.Textbox()), path="/")