File size: 1,603 Bytes
37eaff3
 
 
 
 
af84e50
37eaff3
 
 
 
 
 
af84e50
c9f4e23
37eaff3
 
 
 
 
af84e50
37eaff3
 
 
c9f4e23
 
af84e50
37eaff3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af84e50
37eaff3
af84e50
 
c9f4e23
af84e50
37eaff3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os, subprocess, time
from typing import Optional
import gradio as gr
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel

MODEL_FILE = "Dolphin-X1-8B.Q4_K_M.gguf"
MODEL_URL  = "https://huggingface.co/dphn/Dolphin-X1-8B-GGUF/resolve/main/Dolphin-X1-8B.Q4_K_M.gguf"
SPACE_API_KEY = os.getenv("SPACE_API_KEY")

BUILD_DIR = "llama"
THREADS = "4"

def setup():
    if not os.path.exists(MODEL_FILE):
        subprocess.run(["wget", "-q", MODEL_URL, "-O", MODEL_FILE])

    if not os.path.exists("llama.cpp"):
        subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"])

    if not os.path.exists(BUILD_DIR):
        subprocess.run(["cmake", "-S", "llama.cpp", "-B", BUILD_DIR])
        subprocess.run(["cmake", "--build", BUILD_DIR, "--config", "Release", "-j", THREADS])

setup()

app = FastAPI()

class Query(BaseModel):
    q: str

@app.post("/api/chat")
def chat(q: Query, x_api_key: Optional[str] = Header(None)):
    if x_api_key != SPACE_API_KEY:
        raise HTTPException(401, "Unauthorized")

    p = subprocess.Popen(
        [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q.q, "-n", "200", "-t", THREADS],
        stdout=subprocess.PIPE
    )
    return {"reply": p.stdout.read().decode(errors="ignore")}

def ui_chat(q):
    p = subprocess.Popen(
        [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q, "-n", "200", "-t", THREADS],
        stdout=subprocess.PIPE
    )
    return p.stdout.read().decode(errors="ignore")

gr.mount_gradio_app(app, gr.Interface(ui_chat, gr.Textbox(), gr.Textbox()), path="/")