#!/bin/bash # Launch DramaBox IC-LoRA training. Wraps src/train.py with accelerate. # Usage: # ./scripts/train.sh --config configs/training_args.yaml --gpus 2,3,4,5,6 set -e CONFIG="" GPUS=${GPUS:-0,1,2,3,4,5,6,7} NUM_PROCS=${NUM_PROCS:-} TRAIN_VAL_GPU=${TRAIN_VAL_GPU:-} EXTRA_ARGS="" while [[ $# -gt 0 ]]; do case $1 in --config) CONFIG="$2"; shift 2;; --gpus) GPUS="$2"; shift 2;; --num-procs) NUM_PROCS="$2"; shift 2;; --train-val-gpu) TRAIN_VAL_GPU="$2"; shift 2;; *) EXTRA_ARGS+=" $1"; shift;; esac done if [[ -z "$CONFIG" ]]; then echo "Usage: $0 --config [--gpus 2,3,4,5,6] [--num-procs N] [--train-val-gpu N]" exit 1 fi REPO="$(cd "$(dirname "$0")/.." && pwd)" PYTHON="${PYTHON:-/usr/bin/env python}" # Default num_procs to gpu count if unset. if [[ -z "$NUM_PROCS" ]]; then NUM_PROCS=$(awk -F',' '{print NF}' <<< "$GPUS") fi # Convert YAML config -> CLI args (accepts a flat dict mapping arg-name -> value # and a `data_dir` / `speaker_index` list). get() { "$PYTHON" -c "import yaml,sys; c=yaml.safe_load(open('$CONFIG')); v=c.get('$1', '$2'); print(v if not isinstance(v,(list,tuple)) else ' '.join(map(str,v)))"; } DATA_DIRS=$(get data_dir "") SPK_IDX=$(get speaker_index "") OUT_DIR=$(get output_dir "tts_iclora_v1") [[ "$OUT_DIR" != /* ]] && OUT_DIR="$REPO/$OUT_DIR" CKPT=$(get checkpoint "$REPO/ltx-2.3-22b-dev.safetensors") FULL_CKPT=$(get full_checkpoint "$REPO/ltx-2.3-22b-dev.safetensors") BASE_MODEL=$(get base_model dev) LORA_RANK=$(get lora_rank 128) LORA_ALPHA=$(get lora_alpha 128) LORA_DROPOUT=$(get lora_dropout 0.0) RESUME_LORA=$(get resume_lora "") [[ -n "$RESUME_LORA" && "$RESUME_LORA" != /* ]] && RESUME_LORA="$REPO/$RESUME_LORA" REF_RATIO=$(get ref_ratio 0.3) MAX_REF=$(get max_ref_tokens 200) TEXT_DROP=$(get text_dropout 0.4) STEPS=$(get steps 10000) LR=$(get lr 0.0001) SCHED=$(get lr_scheduler cosine) BATCH=$(get batch_size 1) GRAD_ACC=$(get grad_accum 4) GRAD_NORM=$(get max_grad_norm 1.0) SAVE_EVERY=$(get save_every 500) LOG_EVERY=$(get log_every 50) SEED=$(get seed 53) WARMUP=$(get warmup_steps 500) VAL_CFG=$(get val_config "") [[ -n "$VAL_CFG" && "$VAL_CFG" != /* ]] && VAL_CFG="$REPO/configs/$VAL_CFG" mkdir -p "$OUT_DIR" CMD=( "$PYTHON" -u -m accelerate.commands.launch --num_processes="$NUM_PROCS" --mixed_precision=bf16 "$REPO/src/train.py" --data-dir $DATA_DIRS --speaker-index $SPK_IDX --output-dir "$OUT_DIR" --checkpoint "$CKPT" --full-checkpoint "$FULL_CKPT" --base-model "$BASE_MODEL" --lora-rank "$LORA_RANK" --lora-alpha "$LORA_ALPHA" --lora-dropout "$LORA_DROPOUT" --ref-ratio "$REF_RATIO" --max-ref-tokens "$MAX_REF" --text-dropout "$TEXT_DROP" --steps "$STEPS" --lr "$LR" --lr-scheduler "$SCHED" --batch-size "$BATCH" --grad-accum "$GRAD_ACC" --max-grad-norm "$GRAD_NORM" --save-every "$SAVE_EVERY" --log-every "$LOG_EVERY" --seed "$SEED" --warmup-steps "$WARMUP" ) [[ -n "$RESUME_LORA" ]] && CMD+=( --resume-lora "$RESUME_LORA" ) [[ -n "$VAL_CFG" ]] && CMD+=( --val-config "$VAL_CFG" ) CMD+=( $EXTRA_ARGS ) LAUNCH_ENV=( "CUDA_VISIBLE_DEVICES=$GPUS" ) [[ -n "$TRAIN_VAL_GPU" ]] && LAUNCH_ENV+=( "TRAIN_VAL_GPU=$TRAIN_VAL_GPU" ) echo "==== launching DramaBox training ====" echo " GPUs: $GPUS (procs: $NUM_PROCS)" echo " out: $OUT_DIR" echo " ckpt: $CKPT" echo " steps: $STEPS lr: $LR $SCHED warmup: $WARMUP" echo "======================================" env "${LAUNCH_ENV[@]}" "${CMD[@]}"