Hanrui / dflash /run_benchmark.sh
Lekr0's picture
Add files using upload-large-folder tool
4024ed7 verified
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
mkdir -p logs
TASKS=(
"gsm8k:128"
"math500:128"
"aime24:30"
"aime25:30"
"humaneval:164"
"mbpp:128"
"livecodebench:128"
"swe-bench:128"
"mt-bench:80"
"alpaca:128"
)
for task in "${TASKS[@]}"; do
IFS=':' read -r DATASET_NAME MAX_SAMPLES <<< "$task"
echo "========================================================"
echo "Running Benchmark: $DATASET_NAME with $MAX_SAMPLES samples"
echo "========================================================"
torchrun \
--nproc_per_node=8 \
--master_port=29600 \
benchmark.py \
--dataset "$DATASET_NAME" \
--max-samples "$MAX_SAMPLES" \
--model-name-or-path Qwen/Qwen3-4B \
--draft-name-or-path z-lab/Qwen3-4B-DFlash-b16 \
--max-new-tokens 2048 \
--temperature 0.0 \
2>&1 | tee "logs/${DATASET_NAME}.log"
done