dengcao commited on
Commit
f68da82
·
verified ·
1 Parent(s): a422b39

Update docker-compose.yaml

Browse files
Files changed (1) hide show
  1. docker-compose.yaml +19 -18
docker-compose.yaml CHANGED
@@ -1,18 +1,19 @@
1
- services:
2
- Qwen3-Reranker-8B:
3
- container_name: Qwen3-Reranker-8B
4
- restart: no
5
- image: dengcao/vllm-openai:v0.9.2-dev #采用vllm最新的开发版制作的镜像,经在NVIDIA RTX3060平台主机上测试正常,可放心使用。
6
- ipc: host
7
- volumes:
8
- - ./models:/models
9
- command: ['--model', '/models/Qwen3-Reranker-8B', '--served-model-name', 'Qwen3-Reranker-8B', '--gpu-memory-utilization', '0.90', '--hf_overrides','{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}']
10
- ports:
11
- - 8012:8000
12
- deploy:
13
- resources:
14
- reservations:
15
- devices:
16
- - driver: nvidia
17
- count: all
18
- capabilities: [gpu]
 
 
1
+ services:
2
+ Qwen3-Reranker-8B:
3
+ container_name: Qwen3-Reranker-8B
4
+ restart: no
5
+ #image: dengcao/vllm-openai:v0.9.2-dev #采用vllm最新的开发版制作的镜像,经在NVIDIA RTX3060平台主机上测试正常,可放心使用。
6
+ image: dengcao/vllm-openai:v0.9.2rc2
7
+ ipc: host
8
+ volumes:
9
+ - ./models:/models
10
+ command: ['--model', '/models/Qwen3-Reranker-8B', '--served-model-name', 'Qwen3-Reranker-8B', '--gpu-memory-utilization', '0.90', '--hf_overrides','{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}']
11
+ ports:
12
+ - 8012:8000
13
+ deploy:
14
+ resources:
15
+ reservations:
16
+ devices:
17
+ - driver: nvidia
18
+ count: all
19
+ capabilities: [gpu]