#!/bin/bash # DFlash evaluation: compare baseline vs multi-step denoising (8 GPU data parallel) # # Usage: # bash examples/run_eval_dflash.sh # run step=1,2,3 all # bash examples/run_eval_dflash.sh 2 # only step=2 # # Each GPU loads target+draft model independently, samples are split across GPUs. SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) ROOT_DIR=$(dirname $SCRIPT_DIR) # Activate conda env source /workspace/miniconda3/etc/profile.d/conda.sh conda activate specforge export PYTHONPATH=$ROOT_DIR:$PYTHONPATH export HF_DATASETS_CACHE=/workspace/hanrui/datasets export HF_HOME=/workspace/hanrui/cache/specforge_hf_home export HF_DATASETS_OFFLINE=1 export HF_HUB_OFFLINE=1 export TRANSFORMERS_OFFLINE=1 # ============ Configuration ============ NUM_GPUS=${NUM_GPUS:-8} TARGET_MODEL=${TARGET_MODEL:-"/workspace/models/Qwen3-8B"} DRAFT_MODEL=${DRAFT_MODEL:-"/workspace/models/Qwen3-8B-DFlash-b16"} DATASET=${DATASET:-"math500"} MAX_SAMPLES=${MAX_SAMPLES:-500} MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-512} TEMPERATURE=${TEMPERATURE:-0.0} OUTPUT_DIR=${OUTPUT_DIR:-"$ROOT_DIR/results/dflash_eval"} # ======================================== mkdir -p $OUTPUT_DIR run_eval() { local steps=$1 echo "" echo "============================================" echo " Running DFlash eval: denoise_steps=$steps" echo " GPUs: $NUM_GPUS, Samples: $MAX_SAMPLES" echo "============================================" torchrun \ --standalone \ --nproc_per_node $NUM_GPUS \ $ROOT_DIR/scripts/eval_dflash.py \ --target-model-path $TARGET_MODEL \ --draft-model-path $DRAFT_MODEL \ --dataset $DATASET \ --max-samples $MAX_SAMPLES \ --max-new-tokens $MAX_NEW_TOKENS \ --num-denoise-steps $steps \ --temperature $TEMPERATURE \ --output-file $OUTPUT_DIR/${DATASET}_steps${steps}.json \ 2>&1 | tee $OUTPUT_DIR/${DATASET}_steps${steps}.log } if [ -n "$1" ]; then run_eval $1 else run_eval 1 run_eval 2 run_eval 3 echo "" echo "============================================" echo " All evaluations complete!" echo " Results in: $OUTPUT_DIR/" echo "============================================" echo "" echo "Quick comparison:" for f in $OUTPUT_DIR/${DATASET}_steps*.json; do steps=$(echo $f | grep -oP 'steps\K[0-9]+') tau=$(python -c "import json; d=json.load(open('$f')); print(f'{d[\"results\"][\"avg_tau\"]:.2f}')" 2>/dev/null || echo "N/A") echo " steps=$steps avg_tau=$tau" done fi