#!/usr/bin/env bash # Launch (CD-)OPD training detached so it survives SSH disconnect. # Usage: # scripts/launch_opd.sh [extra hydra overrides ...] # vanilla OPD # CONFIG=cd_opd scripts/launch_opd.sh [extra hydra overrides ...] # CD-OPD (+Mi/+Pa) # # Ablation env vars consumed by configs/cd_opd_qwen25vl.sh: # CFG_ENABLED, CFG_ALPHA, CFG_PERTURBATION, CFG_PERTURBATIONS, CFG_AGGREGATION # TOTAL_EPOCHS, ACTOR_LR, EXPERIMENT_NAME set -euo pipefail ROOT=/mnt/local-fast/opd_zt cd "$ROOT" source "$ROOT/.env" source "$ROOT/.venv/bin/activate" CONFIG=${CONFIG:-opd} case "$CONFIG" in opd) CONFIG_FILE="$ROOT/configs/opd_qwen25vl.sh"; TAG=opd ;; cd_opd) CONFIG_FILE="$ROOT/configs/cd_opd_qwen25vl.sh"; TAG=cd_opd ;; *) echo "Unknown CONFIG=$CONFIG (expected opd|cd_opd)"; exit 1 ;; esac LOG_DIR="$ROOT/logs" mkdir -p "$LOG_DIR" STAMP=$(date +%Y%m%d_%H%M%S) LOG="$LOG_DIR/${TAG}_${STAMP}.log" PID_FILE="$LOG_DIR/${TAG}.pid" if [[ -f "$PID_FILE" ]] && kill -0 "$(cat "$PID_FILE")" 2>/dev/null; then echo "$TAG training already running, pid=$(cat "$PID_FILE")" echo "Tail log: tail -f $(readlink -f "$LOG_DIR/${TAG}.latest.log")" exit 1 fi setsid nohup bash "$CONFIG_FILE" "$@" \ > "$LOG" 2>&1 < /dev/null & PID=$! echo "$PID" > "$PID_FILE" ln -sfn "$LOG" "$LOG_DIR/${TAG}.latest.log" disown $PID || true echo "launched $TAG, pid=$PID" echo "log: $LOG (symlinked at $LOG_DIR/${TAG}.latest.log)" echo echo "Watch with: tail -f $LOG_DIR/${TAG}.latest.log" echo "Stop with: kill $PID"