File size: 347 Bytes

ee3af03

CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \
    --model /root/autodl-tmp/output_7B_FULL_cotSFT/v0-20250621-230827/Qwen2.5-Omni-7B \
    --tokenizer /root/autodl-tmp/output_7B_FULL_cotSFT/v0-20250621-230827/Qwen2.5-Omni-7B \
    --dtype bfloat16 \
    --host 127.0.0.1 \
    --port 8000 \
    --gpu-memory-utilization 0.9