drizzlezyk commited on
Commit
728efb0
·
verified ·
1 Parent(s): 23c5441

Upload examples/start_serving_openpangu_r_72b_2512.sh with huggingface_hub

Browse files
examples/start_serving_openpangu_r_72b_2512.sh ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
2
+
3
+ export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3
4
+ export VLLM_USE_V1=1
5
+ export VLLM_WORKER_MULTIPROC_METHOD=fork
6
+ export VLLM_ENABLE_MC2=0
7
+ export USING_LCCL_COM=0
8
+
9
+ export OMNI_USE_PANGU=1
10
+ export ENABLE_PREFILL_TND=1
11
+
12
+ export HCCL_OP_EXPANSION_MODE="AIV"
13
+ export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
14
+ export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
15
+
16
+ export HCCL_RDMA_TIMEOUT=5
17
+ export HCCL_DETERMINISTIC=False
18
+ export ASCEND_GLOBAL_LOG_LEVEL=3
19
+ export CPU_AFFINITY_CONF=2
20
+ export VLLM_LOGGING_LEVEL=INFO
21
+
22
+ export HCCL_BUFFSIZE=1000
23
+ export HCCL_CONNECT_TIMEOUT=1800
24
+ export HCCL_EXEC_TIMEOUT=1800
25
+ export HCCL_INTRA_ROCE_ENABLE=1
26
+ export HCCL_INTRA_PCIE_ENABLE=0
27
+ export FORCE_ENABLE_CHUNK_PREFILL=1
28
+
29
+ export USE_REASONING=${USE_REASONING:=1}
30
+ export USE_TOOL=${USE_TOOL:=1}
31
+ if [ "$USE_REASONING" = "1" ]; then
32
+ reasoning="--reasoning-parser pangu"
33
+ fi
34
+ if [ "$USE_TOOL" = "1" ]; then
35
+ tools="--enable-auto-tool-choice --tool-call-parser pangu"
36
+ fi
37
+
38
+ export PYTHONPATH=/path/to/omniinfer/:$PYTHONPATH
39
+ rm -r -f .torchair_cache/
40
+
41
+ python start_api_servers.py \
42
+ --num-servers 1 \
43
+ --model-path /path/to/model/ \
44
+ --master-ip 0.0.0.0 \
45
+ --tp 4 \
46
+ --num-dp 1 \
47
+ --master-port 3512 \
48
+ --served-model-name openpangu_r_72b_2512 \
49
+ --log-dir apiserverlog_pangu72B_hybrid_chunk \
50
+ --extra-args "--max-num-batched-tokens 2048 --enforce-eager --no-enable-prefix-caching --enable-expert-parallel --max-num-seqs 32 --long-prefill-token-threshold 1024" \
51
+ --base-api-port 8000 \
52
+ --gpu-util 0.90 \
53
+ --no-enable-prefix-caching \
54
+ --max-model-len 131072 \
55
+ $reasoning \
56
+ $tools \
57
+ --additional-config '{"graph_model_compile_config":{"level":1, "use_ge_graph_cached":true, "decode_gear_list": [32]}, "enable_hybrid_graph_mode": false, "expert_parallel_size": 4, "expert_tensor_parallel_size": 1}' &