| CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \ | |
| --model /root/autodl-tmp/output_7B_FULL_cotSFT/v0-20250621-230827/Qwen2.5-Omni-7B \ | |
| --tokenizer /root/autodl-tmp/output_7B_FULL_cotSFT/v0-20250621-230827/Qwen2.5-Omni-7B \ | |
| --dtype bfloat16 \ | |
| --host 127.0.0.1 \ | |
| --port 8000 \ | |
| --gpu-memory-utilization 0.9 | |