File size: 2,113 Bytes
7c50656 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | #!/bin/bash
# Step 3: Run HumanEval / MT-Bench / GSM8K benchmarks.
# Run AFTER start_server.sh is up.
# Usage:
# bash run_bench.sh # all three benches, full dataset
# bash run_bench.sh humaneval # only humaneval
# bash run_bench.sh mtbench gsm8k # pick any subset
set -e
INTRANET_IP=10.1.1.72
PORT=30000
BASE_MODEL=/workspace/models/Qwen3-8B
MERGED=/workspace/hanrui/syxin_old/Specforge/outputs/qwen3-8b-sft-32gpu-v2-merged
BENCH_DIR=/workspace/hanrui/syxin_old/Specforge/benchmarks
RESULT_DIR=$BENCH_DIR/results
# ---- sanity check ----
echo "Checking server at http://$INTRANET_IP:$PORT ..."
curl -sf http://$INTRANET_IP:$PORT/v1/models > /dev/null || {
echo "[ERROR] Server not reachable. Start it first: bash start_server.sh"
exit 1
}
echo "Server OK."
mkdir -p $RESULT_DIR
cd $BENCH_DIR
export PYTHONPATH=/workspace/hanrui/syxin_old/Specforge:$PYTHONPATH
# ---- decide which benches to run ----
TARGETS=("$@")
if [ ${#TARGETS[@]} -eq 0 ]; then
TARGETS=(humaneval mtbench gsm8k)
fi
BENCH_ARGS=""
for t in "${TARGETS[@]}"; do
case $t in
humaneval) BENCH_ARGS="$BENCH_ARGS humaneval:164" ;;
mtbench) BENCH_ARGS="$BENCH_ARGS mtbench:80" ;;
gsm8k) BENCH_ARGS="$BENCH_ARGS gsm8k:1319" ;;
*)
echo "[ERROR] Unknown bench: $t (choices: humaneval mtbench gsm8k)"
exit 1
;;
esac
done
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
echo "Running: $BENCH_ARGS"
echo "Results -> $RESULT_DIR"
echo ""
python3 bench_eagle3.py \
--model-path $BASE_MODEL \
--speculative-draft-model-path $MERGED \
--host $INTRANET_IP \
--port $PORT \
--config-list "16,4,1,4" \
--benchmark-list $BENCH_ARGS \
--output-dir $RESULT_DIR \
--name dflash_lora_${TIMESTAMP} \
--skip-launch-server \
2>&1 | tee $RESULT_DIR/bench_${TIMESTAMP}.log
echo ""
echo "Done. Latest result files:"
ls -lht $RESULT_DIR/*.jsonl 2>/dev/null | head -5
|