File size: 2,113 Bytes
7c50656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
# Step 3: Run HumanEval / MT-Bench / GSM8K benchmarks.
# Run AFTER start_server.sh is up.
# Usage:
#   bash run_bench.sh                    # all three benches, full dataset
#   bash run_bench.sh humaneval          # only humaneval
#   bash run_bench.sh mtbench gsm8k      # pick any subset

set -e

INTRANET_IP=10.1.1.72
PORT=30000
BASE_MODEL=/workspace/models/Qwen3-8B
MERGED=/workspace/hanrui/syxin_old/Specforge/outputs/qwen3-8b-sft-32gpu-v2-merged
BENCH_DIR=/workspace/hanrui/syxin_old/Specforge/benchmarks
RESULT_DIR=$BENCH_DIR/results

# ---- sanity check ----
echo "Checking server at http://$INTRANET_IP:$PORT ..."
curl -sf http://$INTRANET_IP:$PORT/v1/models > /dev/null || {
    echo "[ERROR] Server not reachable. Start it first: bash start_server.sh"
    exit 1
}
echo "Server OK."

mkdir -p $RESULT_DIR
cd $BENCH_DIR
export PYTHONPATH=/workspace/hanrui/syxin_old/Specforge:$PYTHONPATH

# ---- decide which benches to run ----
TARGETS=("$@")
if [ ${#TARGETS[@]} -eq 0 ]; then
    TARGETS=(humaneval mtbench gsm8k)
fi

BENCH_ARGS=""
for t in "${TARGETS[@]}"; do
    case $t in
        humaneval) BENCH_ARGS="$BENCH_ARGS humaneval:164" ;;
        mtbench)   BENCH_ARGS="$BENCH_ARGS mtbench:80"   ;;
        gsm8k)     BENCH_ARGS="$BENCH_ARGS gsm8k:1319"   ;;
        *)
            echo "[ERROR] Unknown bench: $t (choices: humaneval mtbench gsm8k)"
            exit 1
            ;;
    esac
done

TIMESTAMP=$(date +%Y%m%d_%H%M%S)
echo "Running: $BENCH_ARGS"
echo "Results -> $RESULT_DIR"
echo ""

python3 bench_eagle3.py \
    --model-path                   $BASE_MODEL \
    --speculative-draft-model-path $MERGED \
    --host                         $INTRANET_IP \
    --port                         $PORT \
    --config-list                  "16,4,1,4" \
    --benchmark-list               $BENCH_ARGS \
    --output-dir                   $RESULT_DIR \
    --name                         dflash_lora_${TIMESTAMP} \
    --skip-launch-server \
    2>&1 | tee $RESULT_DIR/bench_${TIMESTAMP}.log

echo ""
echo "Done. Latest result files:"
ls -lht $RESULT_DIR/*.jsonl 2>/dev/null | head -5