| | import os, subprocess, time |
| | from typing import Optional |
| | import gradio as gr |
| | from fastapi import FastAPI, Header, HTTPException |
| | from pydantic import BaseModel |
| |
|
| | MODEL_FILE = "Dolphin-X1-8B.Q4_K_M.gguf" |
| | MODEL_URL = "https://huggingface.co/dphn/Dolphin-X1-8B-GGUF/resolve/main/Dolphin-X1-8B.Q4_K_M.gguf" |
| | SPACE_API_KEY = os.getenv("SPACE_API_KEY") |
| |
|
| | BUILD_DIR = "llama" |
| | THREADS = "4" |
| |
|
| | def setup(): |
| | if not os.path.exists(MODEL_FILE): |
| | subprocess.run(["wget", "-q", MODEL_URL, "-O", MODEL_FILE]) |
| |
|
| | if not os.path.exists("llama.cpp"): |
| | subprocess.run(["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp"]) |
| |
|
| | if not os.path.exists(BUILD_DIR): |
| | subprocess.run(["cmake", "-S", "llama.cpp", "-B", BUILD_DIR]) |
| | subprocess.run(["cmake", "--build", BUILD_DIR, "--config", "Release", "-j", THREADS]) |
| |
|
| | setup() |
| |
|
| | app = FastAPI() |
| |
|
| | class Query(BaseModel): |
| | q: str |
| |
|
| | @app.post("/api/chat") |
| | def chat(q: Query, x_api_key: Optional[str] = Header(None)): |
| | if x_api_key != SPACE_API_KEY: |
| | raise HTTPException(401, "Unauthorized") |
| |
|
| | p = subprocess.Popen( |
| | [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q.q, "-n", "200", "-t", THREADS], |
| | stdout=subprocess.PIPE |
| | ) |
| | return {"reply": p.stdout.read().decode(errors="ignore")} |
| |
|
| | def ui_chat(q): |
| | p = subprocess.Popen( |
| | [f"./{BUILD_DIR}/bin/main", "-m", MODEL_FILE, "-p", q, "-n", "200", "-t", THREADS], |
| | stdout=subprocess.PIPE |
| | ) |
| | return p.stdout.read().decode(errors="ignore") |
| |
|
| | gr.mount_gradio_app(app, gr.Interface(ui_chat, gr.Textbox(), gr.Textbox()), path="/") |