| services: | |
| sglang: | |
| image: lmsysorg/sglang:latest | |
| container_name: sglang | |
| volumes: | |
| - ${HOME}/.cache/huggingface:/root/.cache/huggingface | |
| # If you use modelscope, you need mount this directory | |
| # - ${HOME}/.cache/modelscope:/root/.cache/modelscope | |
| restart: always | |
| network_mode: host # required by RDMA | |
| privileged: true # required by RDMA | |
| # Or you can only publish port 30000 | |
| # ports: | |
| # - 30000:30000 | |
| environment: | |
| - HF_TOKEN=<secret> | |
| # if you use modelscope to download model, you need set this environment | |
| # - SGLANG_USE_MODELSCOPE=true | |
| entrypoint: python3 -m sglang.launch_server | |
| command: --model-path meta-llama/Llama-3.1-8B-Instruct | |
| --host 0.0.0.0 | |
| --port 30000 | |
| ulimits: | |
| memlock: -1 | |
| stack: 67108864 | |
| ipc: host | |
| healthcheck: | |
| test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"] | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: 1 | |
| capabilities: [gpu] | |