File size: 2,619 Bytes
2d67aa6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | #!/bin/bash
# DFlash evaluation: compare baseline vs multi-step denoising (8 GPU data parallel)
#
# Usage:
# bash examples/run_eval_dflash.sh # run step=1,2,3 all
# bash examples/run_eval_dflash.sh 2 # only step=2
#
# Each GPU loads target+draft model independently, samples are split across GPUs.
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
ROOT_DIR=$(dirname $SCRIPT_DIR)
# Activate conda env
source /workspace/miniconda3/etc/profile.d/conda.sh
conda activate specforge
export PYTHONPATH=$ROOT_DIR:$PYTHONPATH
export HF_DATASETS_CACHE=/workspace/hanrui/datasets
export HF_HOME=/workspace/hanrui/cache/specforge_hf_home
export HF_DATASETS_OFFLINE=1
export HF_HUB_OFFLINE=1
export TRANSFORMERS_OFFLINE=1
# ============ Configuration ============
NUM_GPUS=${NUM_GPUS:-8}
TARGET_MODEL=${TARGET_MODEL:-"/workspace/models/Qwen3-8B"}
DRAFT_MODEL=${DRAFT_MODEL:-"/workspace/models/Qwen3-8B-DFlash-b16"}
DATASET=${DATASET:-"math500"}
MAX_SAMPLES=${MAX_SAMPLES:-500}
MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-512}
TEMPERATURE=${TEMPERATURE:-0.0}
OUTPUT_DIR=${OUTPUT_DIR:-"$ROOT_DIR/results/dflash_eval"}
# ========================================
mkdir -p $OUTPUT_DIR
run_eval() {
local steps=$1
echo ""
echo "============================================"
echo " Running DFlash eval: denoise_steps=$steps"
echo " GPUs: $NUM_GPUS, Samples: $MAX_SAMPLES"
echo "============================================"
torchrun \
--standalone \
--nproc_per_node $NUM_GPUS \
$ROOT_DIR/scripts/eval_dflash.py \
--target-model-path $TARGET_MODEL \
--draft-model-path $DRAFT_MODEL \
--dataset $DATASET \
--max-samples $MAX_SAMPLES \
--max-new-tokens $MAX_NEW_TOKENS \
--num-denoise-steps $steps \
--temperature $TEMPERATURE \
--output-file $OUTPUT_DIR/${DATASET}_steps${steps}.json \
2>&1 | tee $OUTPUT_DIR/${DATASET}_steps${steps}.log
}
if [ -n "$1" ]; then
run_eval $1
else
run_eval 1
run_eval 2
run_eval 3
echo ""
echo "============================================"
echo " All evaluations complete!"
echo " Results in: $OUTPUT_DIR/"
echo "============================================"
echo ""
echo "Quick comparison:"
for f in $OUTPUT_DIR/${DATASET}_steps*.json; do
steps=$(echo $f | grep -oP 'steps\K[0-9]+')
tau=$(python -c "import json; d=json.load(open('$f')); print(f'{d[\"results\"][\"avg_tau\"]:.2f}')" 2>/dev/null || echo "N/A")
echo " steps=$steps avg_tau=$tau"
done
fi
|