| services: | |
| Qwen3-Reranker-8B: | |
| container_name: Qwen3-Reranker-8B | |
| restart: no | |
| #image: dengcao/vllm-openai:v0.9.2-dev #采用vllm最新的开发版制作的镜像,经在NVIDIA RTX3060平台主机上测试正常,可放心使用。 | |
| #image: dengcao/vllm-openai:v0.9.2rc2 | |
| image: dengcao/vllm-openai:v0.9.2 | |
| ipc: host | |
| volumes: | |
| - ./models:/models | |
| command: ['--model', '/models/Qwen3-Reranker-8B', '--served-model-name', 'Qwen3-Reranker-8B', '--gpu-memory-utilization', '0.90', '--hf_overrides','{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}'] | |
| ports: | |
| - 8012:8000 | |
| deploy: | |
| resources: | |
| reservations: | |
| devices: | |
| - driver: nvidia | |
| count: all | |
| capabilities: [gpu] | |