JustinTX
/

shinka-backup

Model card Files Files and versions

shinka-backup / ccevolve /baselines /thetaevolve /slime_plugins /rollout_buffer /rollout_buffer_example.sh

JustinTX's picture

Add files using upload-large-folder tool

d7b3a74 verified 27 days ago

history blame contribute delete

3.06 kB

	#!/bin/bash

	# for rerun the task
	pkill -9 sglang
	sleep 3
	ray stop --force
	pkill -9 ray
	pkill -9 python
	sleep 3
	pkill -9 ray
	pkill -9 python

	set -ex

	export PYTHONBUFFERED=16

	# DeepSeek-R1-Distill-Qwen-7B
	MODEL_ARGS=(
	--swiglu
	--num-layers 28
	--hidden-size 3584
	--ffn-hidden-size 18944
	--num-attention-heads 28
	--group-query-attention
	--num-query-groups 4
	--max-position-embeddings 131072
	--seq-length 4096
	--use-rotary-position-embeddings
	--disable-bias-linear
	--add-qkv-bias
	--normalization "RMSNorm"
	--norm-epsilon 1e-06
	--rotary-base 10000
	--vocab-size 152064
	--accumulate-allreduce-grads-in-fp32
	--attention-softmax-in-fp32
	--attention-backend flash
	--moe-token-dispatcher-type alltoall
	--untie-embeddings-and-output-weights
	--attention-dropout 0.0
	--hidden-dropout 0.0
	)

	CKPT_ARGS=(
	--hf-checkpoint /root/DeepSeek-R1-Distill-Qwen-7B
	--ref-load /root/DeepSeek-R1-Distill-Qwen-7B_torch_dist
	--save-interval 100
	--save /root/DeepSeek-R1-Distill-Qwen-7B_slime
	)

	ROLLOUT_ARGS=(
	--rollout-function-path slime_plugins.rollout_buffer.rollout_buffer_example.generate_rollout
	--rm-type deepscaler
	--prompt-data /root/dapo-math-17k/dapo-math-17k.jsonl
	--input-key prompt
	--label-key label
	--num-rollout 3000
	--rollout-batch-size 128
	--rollout-max-response-len 8192
	--rollout-temperature 0.8
	--rollout-shuffle
	--n-samples-per-prompt 8
	--global-batch-size 1024
	--micro-batch-size 8
	--ref-micro-batch-size 8
	--use-dynamic-batch-size
	--max-tokens-per-gpu 9216
	--balance-data
	)

	DISTRIBUTED_ARGS=(
	--tensor-model-parallel-size 2
	--pipeline-model-parallel-size 1
	--context-parallel-size 1
	--sequence-parallel
	)

	PERF_ARGS=(
	--recompute-granularity full
	--recompute-method uniform
	--recompute-num-layers 1
	)

	GRPO_ARGS=(
	--advantage-estimator grpo
	--use-kl-loss
	--kl-loss-coef 0.001
	--kl-loss-type low_var_kl
	--entropy-coef 0.00
	)

	OPTIMIZER_ARGS=(
	--lr 1e-6
	--lr-decay-style constant
	--weight-decay 0.1
	--adam-beta1 0.9
	--adam-beta2 0.98
	)

	WANDB_ARGS=(
	# --use-wandb
	)

	# launch the master node of ray in container
	export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
	ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus 8 --disable-usage-stats

	ray job submit --address="http://127.0.0.1:8265" \
	--runtime-env-json='{
	"env_vars": {
	"PYTHONPATH": "/root/Megatron-LM/",
	"CUDA_DEVICE_MAX_CONNECTIONS": "1",
	"NCCL_CUMEM_ENABLE": "0"
	}
	}' \
	-- python3 train_async.py \
	--actor-num-nodes 1 \
	--actor-num-gpus-per-node 4 \
	--rollout-num-gpus 4 \
	--rollout-num-gpus-per-engine 1 \
	${MODEL_ARGS[@]} \
	${CKPT_ARGS[@]} \
	${ROLLOUT_ARGS[@]} \
	${OPTIMIZER_ARGS[@]} \
	${GRPO_ARGS[@]} \
	${DISTRIBUTED_ARGS[@]} \
	${WANDB_ARGS[@]} \
	${PERF_ARGS[@]} \
	--rollout-buffer-url http://${MASTER_ADDR}:8889 \
	--keep-old-actor \
	--disable-rewards-normalization \
	--loss-mask-type distill_qwen \
	--log-passrate