File size: 1,136 Bytes
3f04365 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | #!/usr/bin/env bash
# Sequential sweep runner.
#
# Each config grabs all 8 GPUs via accelerate, so they run back-to-back, not in
# parallel. Output goes to logs/<run>.log; the master log goes to logs/sweep_master.log.
# Reads HF_TOKEN, HUGGING_FACE_HUB_TOKEN, WANDB_API_KEY from the calling env.
#
# Launch in the background with:
# nohup ./scripts/run_sweep.sh > logs/sweep_master.log 2>&1 &
set -uo pipefail
cd "$(dirname "$0")/.."
CONFIGS=(
"configs/replicate_zero4.toml"
"configs/grow40_winning.toml"
"configs/grow40_simple.toml"
)
LOG_DIR="logs"
mkdir -p "$LOG_DIR"
for cfg in "${CONFIGS[@]}"; do
name="$(basename "$cfg" .toml)"
log="$LOG_DIR/$name.log"
echo ">>> [$(date '+%F %T')] starting $name -> $log"
.venv/bin/accelerate launch \
--config_file configs/accelerate.yaml \
distill.py \
--config "$cfg" \
> "$log" 2>&1
rc=$?
echo "<<< [$(date '+%F %T')] finished $name (exit=$rc)"
if [[ $rc -ne 0 ]]; then
echo " last 20 lines of $log:"
tail -20 "$log" | sed 's/^/ /'
fi
done
echo ">>> [$(date '+%F %T')] sweep complete"
|