| export ASCEND_RT_VISIBLE_DEVICES=3 | |
| export VLLM_USE_V1=1 | |
| python -m vllm.entrypoints.openai.api_server \ | |
| --model "FreedomIntelligence/openPangu-Embedded-7B" \ | |
| --tensor-parallel-size 1 \ | |
| --port 8001 \ | |
| --host localhost \ | |
| --gpu-memory-utilization 0.4 \ | |
| --trust-remote-code \ | |
| --task embed \ | |
| --dtype bfloat16 \ |