File size: 846 Bytes
bebbdd9
 
 
 
 
286eaa8
 
bebbdd9
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
services:  
  Qwen3-Reranker-4B:
    container_name: Qwen3-Reranker-4B
    restart: no
    #image: dengcao/vllm-openai:v0.9.2-dev #采用vllm最新的开发版制作的镜像,经在NVIDIA RTX3060平台主机上测试正常,可放心使用
    #image: dengcao/vllm-openai:v0.9.2rc2
    image: dengcao/vllm-openai:v0.9.2
    ipc: host
    volumes:
      - ./models:/models
    command: ['--model', '/models/Qwen3-Reranker-4B',  '--served-model-name', 'Qwen3-Reranker-4B',  '--gpu-memory-utilization', '0.90', '--hf_overrides','{"architectures": ["Qwen3ForSequenceClassification"],"classifier_from_token": ["no", "yes"],"is_original_qwen3_reranker": true}']
    ports:
      - 8011:8000
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]