| # Evaluate DFlash-LoRA-Inject: measure accepted length OFFLINE. | |
| # 8 GPUs parallel by default, each GPU runs a shard of prompts independently. | |
| # | |
| # WHY offline? | |
| # sglang STANDALONE treats draft as an independent autoregressive model, | |
| # completely ignoring the layer-by-layer injection that LoRA-Inject was | |
| # trained with. Result: accept_length ≈ 4.7 for ALL models (no signal). | |
| # | |
| # sglang DFLASH expects the DFlash-b16 architecture (5-layer, fc+hidden_norm), | |
| # which is structurally different from LoRA-Inject (full 36-layer + LoRA). | |
| # | |
| # So we run offline spec-generate with the correct injection pattern. | |
| # | |
| # Usage: | |
| # bash start_server_dflash.sh # 8 GPUs, all benchmarks | |
| # bash start_server_dflash.sh 4 # 4 GPUs | |
| # bash start_server_dflash.sh 8 humaneval # specific benchmark | |
| # bash start_server_dflash.sh 8 --num-samples 20 # quick test | |
| set -e | |
| SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) | |
| NUM_GPUS=${1:-8} | |
| shift 2>/dev/null || true | |
| # ---- defaults ---- | |
| BASE_MODEL=/workspace/models/Qwen3-8B | |
| ADAPTER_ROOT=/workspace/hanrui/syxin_old/Specforge/outputs/qwen3-8b-dflash-lora-inject | |
| CKPT=epoch_3_step_1400 | |
| MERGED=/workspace/hanrui/syxin_old/Specforge/outputs/qwen3-8b-dflash-lora-inject-merged | |
| RESULT_DIR=/workspace/hanrui/syxin_old/Specforge/benchmarks/results | |
| PYTHON=/workspace/miniconda3/envs/spec/bin/python3 | |
| echo "============================================" | |
| echo " DFlash-LoRA-Inject Offline Evaluation" | |
| echo " target : $BASE_MODEL" | |
| echo " ckpt : $CKPT" | |
| echo " merged : $MERGED" | |
| echo " GPUs : $NUM_GPUS" | |
| echo "============================================" | |
| $PYTHON -m torch.distributed.run \ | |
| --standalone \ | |
| --nproc_per_node $NUM_GPUS \ | |
| $SCRIPT_DIR/eval_dflash_lora_inject.py \ | |
| --base-model $BASE_MODEL \ | |
| --adapter-root $ADAPTER_ROOT \ | |
| --ckpt $CKPT \ | |
| --merged-path $MERGED \ | |
| --block-size 16 \ | |
| --output-dir $RESULT_DIR \ | |
| "$@" | |