shinka-backup / ccevolve /baselines /thetaevolve /slime_plugins /rollout_buffer /rollout_buffer_example.sh
| # for rerun the task | |
| pkill -9 sglang | |
| sleep 3 | |
| ray stop --force | |
| pkill -9 ray | |
| pkill -9 python | |
| sleep 3 | |
| pkill -9 ray | |
| pkill -9 python | |
| set -ex | |
| export PYTHONBUFFERED=16 | |
| # DeepSeek-R1-Distill-Qwen-7B | |
| MODEL_ARGS=( | |
| --swiglu | |
| --num-layers 28 | |
| --hidden-size 3584 | |
| --ffn-hidden-size 18944 | |
| --num-attention-heads 28 | |
| --group-query-attention | |
| --num-query-groups 4 | |
| --max-position-embeddings 131072 | |
| --seq-length 4096 | |
| --use-rotary-position-embeddings | |
| --disable-bias-linear | |
| --add-qkv-bias | |
| --normalization "RMSNorm" | |
| --norm-epsilon 1e-06 | |
| --rotary-base 10000 | |
| --vocab-size 152064 | |
| --accumulate-allreduce-grads-in-fp32 | |
| --attention-softmax-in-fp32 | |
| --attention-backend flash | |
| --moe-token-dispatcher-type alltoall | |
| --untie-embeddings-and-output-weights | |
| --attention-dropout 0.0 | |
| --hidden-dropout 0.0 | |
| ) | |
| CKPT_ARGS=( | |
| --hf-checkpoint /root/DeepSeek-R1-Distill-Qwen-7B | |
| --ref-load /root/DeepSeek-R1-Distill-Qwen-7B_torch_dist | |
| --save-interval 100 | |
| --save /root/DeepSeek-R1-Distill-Qwen-7B_slime | |
| ) | |
| ROLLOUT_ARGS=( | |
| --rollout-function-path slime_plugins.rollout_buffer.rollout_buffer_example.generate_rollout | |
| --rm-type deepscaler | |
| --prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl | |
| --input-key prompt | |
| --label-key label | |
| --num-rollout 3000 | |
| --rollout-batch-size 128 | |
| --rollout-max-response-len 8192 | |
| --rollout-temperature 0.8 | |
| --rollout-shuffle | |
| --n-samples-per-prompt 8 | |
| --global-batch-size 1024 | |
| --micro-batch-size 8 | |
| --ref-micro-batch-size 8 | |
| --use-dynamic-batch-size | |
| --max-tokens-per-gpu 9216 | |
| --balance-data | |
| ) | |
| DISTRIBUTED_ARGS=( | |
| --tensor-model-parallel-size 2 | |
| --pipeline-model-parallel-size 1 | |
| --context-parallel-size 1 | |
| --sequence-parallel | |
| ) | |
| PERF_ARGS=( | |
| --recompute-granularity full | |
| --recompute-method uniform | |
| --recompute-num-layers 1 | |
| ) | |
| GRPO_ARGS=( | |
| --advantage-estimator grpo | |
| --use-kl-loss | |
| --kl-loss-coef 0.001 | |
| --kl-loss-type low_var_kl | |
| --entropy-coef 0.00 | |
| ) | |
| OPTIMIZER_ARGS=( | |
| --lr 1e-6 | |
| --lr-decay-style constant | |
| --weight-decay 0.1 | |
| --adam-beta1 0.9 | |
| --adam-beta2 0.98 | |
| ) | |
| WANDB_ARGS=( | |
| # --use-wandb | |
| ) | |
| # launch the master node of ray in container | |
| export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} | |
| ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats | |
| ray job submit --address="http://127.0.0.1:8265" \ | |
| --runtime-env-json='{ | |
| "env_vars": { | |
| "PYTHONPATH": "/root/Megatron-LM/", | |
| "CUDA_DEVICE_MAX_CONNECTIONS": "1", | |
| "NCCL_CUMEM_ENABLE": "0" | |
| } | |
| }' \ | |
| -- python3 train_async.py \ | |
| --actor-num-nodes 1 \ | |
| --actor-num-gpus-per-node 4 \ | |
| --rollout-num-gpus 4 \ | |
| --rollout-num-gpus-per-engine 1 \ | |
| ${MODEL_ARGS[@]} \ | |
| ${CKPT_ARGS[@]} \ | |
| ${ROLLOUT_ARGS[@]} \ | |
| ${OPTIMIZER_ARGS[@]} \ | |
| ${GRPO_ARGS[@]} \ | |
| ${DISTRIBUTED_ARGS[@]} \ | |
| ${WANDB_ARGS[@]} \ | |
| ${PERF_ARGS[@]} \ | |
| --rollout-buffer-url http://${MASTER_ADDR}:8889 \ | |
| --keep-old-actor \ | |
| --disable-rewards-normalization \ | |
| --loss-mask-type distill_qwen \ | |
| --log-passrate | |