#!/usr/bin/env bash # Definitive full-scale run. Args: [extra train.py args...] set -euo pipefail MODEL="${1:?model letter}"; RUN_NAME="${2:?run_name}"; shift 2; EXTRA=("$@") echo "[bootstrap] definitive run: model=$MODEL run=$RUN_NAME extra=${EXTRA[*]}" cd /workspace REPO_DIR=""; for d in PhysioJEPA physiojepa; do [ -d "$d" ] && REPO_DIR="$d" && break; done [ -n "$REPO_DIR" ] || { echo "no repo dir"; exit 1; } cd "$REPO_DIR" PY=/usr/bin/python3 $PY -m pip install --quiet --upgrade pip $PY -m pip install --quiet \ 'datasets>=4.8.4' 'einops>=0.8.2' 'matplotlib>=3.10.0' \ 'neurokit2>=0.2.13' 'python-dotenv>=1.0' 'pyyaml>=6.0' \ 'scikit-learn>=1.5' 'scipy>=1.13' 'tqdm>=4.66' \ 'wandb>=0.18' 'wfdb>=4.3.1' 'huggingface_hub>=0.25' 'requests' [ -f /workspace/.env ] && cp /workspace/.env .env # Step 1: download MIMIC shards + build index (idempotent) if [ ! -f /workspace/cache/mimic_index.json ]; then echo "[bootstrap] downloading MIMIC + building index" PYTHONPATH=src $PY scripts/prepare_data.py \ --root /workspace/cache/mimic --index /workspace/cache/mimic_index.json fi # Step 2: precompute mmap windows (idempotent — checks inside) if [ ! -f /workspace/cache/windows_meta.json ]; then echo "[bootstrap] precomputing windows → mmap" PYTHONPATH=src $PY -u scripts/precompute_windows.py \ --index /workspace/cache/mimic_index.json \ --out_dir /workspace/cache fi # Step 3: train mkdir -p /workspace/runs echo "[bootstrap] launching training: model=$MODEL" PYTHONPATH=src PYTHONUNBUFFERED=1 $PY -u scripts/train.py \ --config configs/base.yaml \ --model "$MODEL" \ --run_name "$RUN_NAME" \ --epochs 100 \ --batch_size 64 \ --fast_cache_dir /workspace/cache \ --output_dir /workspace/runs \ --num_workers 12 \ --log_every 100 \ --mask_ratio 0.75 \ --seed 42 \ "${EXTRA[@]}" \ 2>&1 | tee "/workspace/runs/${RUN_NAME}.log" echo "[bootstrap] done"