File size: 849 Bytes
f68da82
 
 
 
 
4498658
 
f68da82
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
services:  
  Qwen3-Reranker-8B:
    container_name: Qwen3-Reranker-8B
    restart: no
    #image: dengcao/vllm-openai:v0.9.2-dev #采用vllm最新的开发版制作的镜像,经在NVIDIA RTX3060平台主机上测试正常,可放心使用。
    #image: dengcao/vllm-openai:v0.9.2rc2
    image: dengcao/vllm-openai:v0.9.2
    ipc: host
    volumes:
      - ./models:/models
    command: ['--model', '/models/Qwen3-Reranker-8B',  '--served-model-name', 'Qwen3-Reranker-8B',  '--gpu-memory-utilization', '0.90', '--hf_overrides','{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}']
    ports:
      - 8012:8000
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]