File size: 861 Bytes
4024ed7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
mkdir -p logs
TASKS=(
"gsm8k:128"
"math500:128"
"aime24:30"
"aime25:30"
"humaneval:164"
"mbpp:128"
"livecodebench:128"
"swe-bench:128"
"mt-bench:80"
"alpaca:128"
)
for task in "${TASKS[@]}"; do
IFS=':' read -r DATASET_NAME MAX_SAMPLES <<< "$task"
echo "========================================================"
echo "Running Benchmark: $DATASET_NAME with $MAX_SAMPLES samples"
echo "========================================================"
torchrun \
--nproc_per_node=8 \
--master_port=29600 \
benchmark.py \
--dataset "$DATASET_NAME" \
--max-samples "$MAX_SAMPLES" \
--model-name-or-path Qwen/Qwen3-4B \
--draft-name-or-path z-lab/Qwen3-4B-DFlash-b16 \
--max-new-tokens 2048 \
--temperature 0.0 \
2>&1 | tee "logs/${DATASET_NAME}.log"
done |