AutoMR-pangu / generator_server.sh
haifei
code and checkpoint
1482463
export ASCEND_RT_VISIBLE_DEVICES=1
export VLLM_USE_V1=1
python -m vllm.entrypoints.openai.api_server \
--model "FreedomIntelligence/openPangu-Embedded-7B" \
--tensor-parallel-size 1 \
--port 8000 \
--host localhost \
--trust-remote-code \
--dtype bfloat16 \
--gpu-memory-utilization 0.90 \