File size: 2,316 Bytes
075e81a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0cbd0a
 
 
 
075e81a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0cbd0a
075e81a
d0cbd0a
075e81a
 
 
 
 
 
 
 
d0cbd0a
075e81a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env bash
set -euo pipefail

# Pull latest main, pick next available prepared batch, and launch Mac training.
# Usage:
#   ./scripts/mac_pull_and_run_next_batch.sh [--dry-run] [--epochs N] [--batch_size N]

DRY_RUN=0
EPOCHS=20
BATCH_SIZE=500
HF_CACHE_MODE="auto"
HF_CACHE_DIR=".hf_cache"
MACHINE_TAG="mac"
LOCK_FILE="ACTIVE_RUNS.md"

while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run) DRY_RUN=1; shift ;;
    --epochs) EPOCHS="$2"; shift 2 ;;
    --batch_size) BATCH_SIZE="$2"; shift 2 ;;
    --hf_cache_mode) HF_CACHE_MODE="$2"; shift 2 ;;
    --hf_cache_dir) HF_CACHE_DIR="$2"; shift 2 ;;
    --machine_tag) MACHINE_TAG="$2"; shift 2 ;;
    *) echo "Unknown arg: $1"; exit 1 ;;
  esac
done

git pull --ff-only origin main

ALL_BATCHES=()
while IFS= read -r line; do
  ALL_BATCHES+=("$line")
done < <(
  for d in data/sinhala_tts_batch*_custom; do
    [[ -d "$d" ]] || continue
    [[ -f "$d/raw.arrow" ]] || continue
    b="$(basename "$d")"
    num="${b#sinhala_tts_batch}"
    num="${num%_custom}"
    if [[ "$num" =~ ^[0-9]+$ ]]; then
      printf "%s\n" "$num"
    fi
  done | sort -n
)

if [[ ${#ALL_BATCHES[@]} -eq 0 ]]; then
  echo "No prepared batches found under data/sinhala_tts_batch*_custom"
  exit 1
fi

NEXT_NUM=""
for n in "${ALL_BATCHES[@]}"; do
  batch_name="sinhala_tts_batch${n}"
  if [[ -f "$LOCK_FILE" ]] && grep -q "$batch_name" "$LOCK_FILE"; then
    continue
  fi
  NEXT_NUM="$n"
  break
done

if [[ -z "$NEXT_NUM" ]]; then
  echo "All available prepared batches appear claimed in $LOCK_FILE"
  exit 1
fi

DATASET_NAME="sinhala_tts_batch${NEXT_NUM}"
EXP_NAME="F5TTS_v1_Base"
START_TS="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
RUN_ID="${MACHINE_TAG}_batch${NEXT_NUM}"

if [[ ! -f "$LOCK_FILE" ]]; then
  cat > "$LOCK_FILE" <<'EOF'
# ACTIVE RUNS
# machine | dataset_name | exp_name | started_utc
EOF
fi

echo "| ${MACHINE_TAG} | ${DATASET_NAME} | ${RUN_ID} | ${START_TS} |" >> "$LOCK_FILE"

CMD=(
  ./scripts/train_f5_local.sh
  --dataset_name "${DATASET_NAME}"
  --exp_name "${EXP_NAME}"
  --epochs "${EPOCHS}"
  --batch_size "${BATCH_SIZE}"
  --hf_cache_mode "${HF_CACHE_MODE}"
  --hf_cache_dir "${HF_CACHE_DIR}"
)

echo "Selected batch: ${DATASET_NAME}"
echo "Command: ${CMD[*]}"

if [[ "$DRY_RUN" -eq 1 ]]; then
  echo "Dry run only; not launching."
  exit 0
fi

exec "${CMD[@]}"