Spaces:
Sleeping
Sleeping
File size: 1,136 Bytes
f52586c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | # Cloud GPU example (Linux + NVIDIA + Docker Compose v2).
#
# On the VPS: clone repo, put GGUF under spinalcord/models/, then from deploy/:
# cp docker-compose.cloud.example.yml docker-compose.cloud.yml
# # edit model filenames in `command` if needed
# docker compose -f docker-compose.cloud.yml up -d
#
# UI + API: http://<server-ip>:8080/
#
# If `gpus: all` fails, install nvidia-container-toolkit and use Docker 24+.
# Image: https://github.com/ggml-org/llama.cpp/pkgs/container/llama.cpp
services:
llama:
image: ghcr.io/ggml-org/llama.cpp:server-cuda
restart: unless-stopped
gpus: all
volumes:
- ../models:/models:ro
- ../dashboard:/dashboard:ro
ports:
- "8080:8080"
command:
- "--model"
- "/models/scbrain_1b.gguf"
- "--model-draft"
- "/models/scdraft_120m.gguf"
- "--jinja"
- "--webui"
- "--draft-max"
- "8"
- "--draft-min"
- "2"
- "-c"
- "4096"
- "-ngl"
- "99"
- "-ngld"
- "0"
- "--host"
- "0.0.0.0"
- "--port"
- "8080"
- "--path"
- "/dashboard"
|