| # Launch (CD-)OPD training detached so it survives SSH disconnect. | |
| # Usage: | |
| # scripts/launch_opd.sh [extra hydra overrides ...] # vanilla OPD | |
| # CONFIG=cd_opd scripts/launch_opd.sh [extra hydra overrides ...] # CD-OPD (+Mi/+Pa) | |
| # | |
| # Ablation env vars consumed by configs/cd_opd_qwen25vl.sh: | |
| # CFG_ENABLED, CFG_ALPHA, CFG_PERTURBATION, CFG_PERTURBATIONS, CFG_AGGREGATION | |
| # TOTAL_EPOCHS, ACTOR_LR, EXPERIMENT_NAME | |
| set -euo pipefail | |
| ROOT=/mnt/local-fast/opd_zt | |
| cd "$ROOT" | |
| source "$ROOT/.env" | |
| source "$ROOT/.venv/bin/activate" | |
| CONFIG=${CONFIG:-opd} | |
| case "$CONFIG" in | |
| opd) CONFIG_FILE="$ROOT/configs/opd_qwen25vl.sh"; TAG=opd ;; | |
| cd_opd) CONFIG_FILE="$ROOT/configs/cd_opd_qwen25vl.sh"; TAG=cd_opd ;; | |
| *) echo "Unknown CONFIG=$CONFIG (expected opd|cd_opd)"; exit 1 ;; | |
| esac | |
| LOG_DIR="$ROOT/logs" | |
| mkdir -p "$LOG_DIR" | |
| STAMP=$(date +%Y%m%d_%H%M%S) | |
| LOG="$LOG_DIR/${TAG}_${STAMP}.log" | |
| PID_FILE="$LOG_DIR/${TAG}.pid" | |
| if [[ -f "$PID_FILE" ]] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then | |
| echo "$TAG training already running, pid=$(cat "$PID_FILE")" | |
| echo "Tail log: tail -f $(readlink -f "$LOG_DIR/${TAG}.latest.log")" | |
| exit 1 | |
| fi | |
| setsid nohup bash "$CONFIG_FILE" "$@" \ | |
| > "$LOG" 2>&1 < /dev/null & | |
| PID=$! | |
| echo "$PID" > "$PID_FILE" | |
| ln -sfn "$LOG" "$LOG_DIR/${TAG}.latest.log" | |
| disown $PID || true | |
| echo "launched $TAG, pid=$PID" | |
| echo "log: $LOG (symlinked at $LOG_DIR/${TAG}.latest.log)" | |
| echo | |
| echo "Watch with: tail -f $LOG_DIR/${TAG}.latest.log" | |
| echo "Stop with: kill $PID" | |