#!/usr/bin/env bash set -euo pipefail # Pull latest main, pick next available prepared batch, and launch Mac training. # Usage: # ./scripts/mac_pull_and_run_next_batch.sh [--dry-run] [--epochs N] [--batch_size N] DRY_RUN=0 EPOCHS=20 BATCH_SIZE=500 HF_CACHE_MODE="auto" HF_CACHE_DIR=".hf_cache" MACHINE_TAG="mac" LOCK_FILE="ACTIVE_RUNS.md" while [[ $# -gt 0 ]]; do case "$1" in --dry-run) DRY_RUN=1; shift ;; --epochs) EPOCHS="$2"; shift 2 ;; --batch_size) BATCH_SIZE="$2"; shift 2 ;; --hf_cache_mode) HF_CACHE_MODE="$2"; shift 2 ;; --hf_cache_dir) HF_CACHE_DIR="$2"; shift 2 ;; --machine_tag) MACHINE_TAG="$2"; shift 2 ;; *) echo "Unknown arg: $1"; exit 1 ;; esac done git pull --ff-only origin main ALL_BATCHES=() while IFS= read -r line; do ALL_BATCHES+=("$line") done < <( for d in data/sinhala_tts_batch*_custom; do [[ -d "$d" ]] || continue [[ -f "$d/raw.arrow" ]] || continue b="$(basename "$d")" num="${b#sinhala_tts_batch}" num="${num%_custom}" if [[ "$num" =~ ^[0-9]+$ ]]; then printf "%s\n" "$num" fi done | sort -n ) if [[ ${#ALL_BATCHES[@]} -eq 0 ]]; then echo "No prepared batches found under data/sinhala_tts_batch*_custom" exit 1 fi NEXT_NUM="" for n in "${ALL_BATCHES[@]}"; do batch_name="sinhala_tts_batch${n}" if [[ -f "$LOCK_FILE" ]] && grep -q "$batch_name" "$LOCK_FILE"; then continue fi NEXT_NUM="$n" break done if [[ -z "$NEXT_NUM" ]]; then echo "All available prepared batches appear claimed in $LOCK_FILE" exit 1 fi DATASET_NAME="sinhala_tts_batch${NEXT_NUM}" EXP_NAME="F5TTS_v1_Base" START_TS="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" RUN_ID="${MACHINE_TAG}_batch${NEXT_NUM}" if [[ ! -f "$LOCK_FILE" ]]; then cat > "$LOCK_FILE" <<'EOF' # ACTIVE RUNS # machine | dataset_name | exp_name | started_utc EOF fi echo "| ${MACHINE_TAG} | ${DATASET_NAME} | ${RUN_ID} | ${START_TS} |" >> "$LOCK_FILE" CMD=( ./scripts/train_f5_local.sh --dataset_name "${DATASET_NAME}" --exp_name "${EXP_NAME}" --epochs "${EPOCHS}" --batch_size "${BATCH_SIZE}" --hf_cache_mode "${HF_CACHE_MODE}" --hf_cache_dir "${HF_CACHE_DIR}" ) echo "Selected batch: ${DATASET_NAME}" echo "Command: ${CMD[*]}" if [[ "$DRY_RUN" -eq 1 ]]; then echo "Dry run only; not launching." exit 0 fi exec "${CMD[@]}"