export ASCEND_RT_VISIBLE_DEVICES=1 export VLLM_USE_V1=1 python -m vllm.entrypoints.openai.api_server \ --model "FreedomIntelligence/openPangu-Embedded-7B" \ --tensor-parallel-size 1 \ --port 8000 \ --host localhost \ --trust-remote-code \ --dtype bfloat16 \ --gpu-memory-utilization 0.90 \