| #!/bin/bash |
| |
| |
| |
| set -e |
|
|
| CONFIG="" |
| GPUS=${GPUS:-0,1,2,3,4,5,6,7} |
| NUM_PROCS=${NUM_PROCS:-} |
| TRAIN_VAL_GPU=${TRAIN_VAL_GPU:-} |
| EXTRA_ARGS="" |
|
|
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| --config) CONFIG="$2"; shift 2;; |
| --gpus) GPUS="$2"; shift 2;; |
| --num-procs) NUM_PROCS="$2"; shift 2;; |
| --train-val-gpu) TRAIN_VAL_GPU="$2"; shift 2;; |
| *) EXTRA_ARGS+=" $1"; shift;; |
| esac |
| done |
|
|
| if [[ -z "$CONFIG" ]]; then |
| echo "Usage: $0 --config <yaml> [--gpus 2,3,4,5,6] [--num-procs N] [--train-val-gpu N]" |
| exit 1 |
| fi |
|
|
| REPO="$(cd "$(dirname "$0")/.." && pwd)" |
| PYTHON="${PYTHON:-/usr/bin/env python}" |
|
|
| |
| if [[ -z "$NUM_PROCS" ]]; then |
| NUM_PROCS=$(awk -F',' '{print NF}' <<< "$GPUS") |
| fi |
|
|
| |
| |
| get() { "$PYTHON" -c "import yaml,sys; c=yaml.safe_load(open('$CONFIG')); v=c.get('$1', '$2'); print(v if not isinstance(v,(list,tuple)) else ' '.join(map(str,v)))"; } |
|
|
| DATA_DIRS=$(get data_dir "") |
| SPK_IDX=$(get speaker_index "") |
| OUT_DIR=$(get output_dir "tts_iclora_v1") |
| [[ "$OUT_DIR" != /* ]] && OUT_DIR="$REPO/$OUT_DIR" |
| CKPT=$(get checkpoint "$REPO/ltx-2.3-22b-dev.safetensors") |
| FULL_CKPT=$(get full_checkpoint "$REPO/ltx-2.3-22b-dev.safetensors") |
| BASE_MODEL=$(get base_model dev) |
| LORA_RANK=$(get lora_rank 128) |
| LORA_ALPHA=$(get lora_alpha 128) |
| LORA_DROPOUT=$(get lora_dropout 0.0) |
| RESUME_LORA=$(get resume_lora "") |
| [[ -n "$RESUME_LORA" && "$RESUME_LORA" != /* ]] && RESUME_LORA="$REPO/$RESUME_LORA" |
| REF_RATIO=$(get ref_ratio 0.3) |
| MAX_REF=$(get max_ref_tokens 200) |
| TEXT_DROP=$(get text_dropout 0.4) |
| STEPS=$(get steps 10000) |
| LR=$(get lr 0.0001) |
| SCHED=$(get lr_scheduler cosine) |
| BATCH=$(get batch_size 1) |
| GRAD_ACC=$(get grad_accum 4) |
| GRAD_NORM=$(get max_grad_norm 1.0) |
| SAVE_EVERY=$(get save_every 500) |
| LOG_EVERY=$(get log_every 50) |
| SEED=$(get seed 53) |
| WARMUP=$(get warmup_steps 500) |
| VAL_CFG=$(get val_config "") |
| [[ -n "$VAL_CFG" && "$VAL_CFG" != /* ]] && VAL_CFG="$REPO/configs/$VAL_CFG" |
|
|
| mkdir -p "$OUT_DIR" |
|
|
| CMD=( "$PYTHON" -u -m accelerate.commands.launch |
| --num_processes="$NUM_PROCS" --mixed_precision=bf16 |
| "$REPO/src/train.py" |
| --data-dir $DATA_DIRS |
| --speaker-index $SPK_IDX |
| --output-dir "$OUT_DIR" |
| --checkpoint "$CKPT" --full-checkpoint "$FULL_CKPT" --base-model "$BASE_MODEL" |
| --lora-rank "$LORA_RANK" --lora-alpha "$LORA_ALPHA" --lora-dropout "$LORA_DROPOUT" |
| --ref-ratio "$REF_RATIO" --max-ref-tokens "$MAX_REF" --text-dropout "$TEXT_DROP" |
| --steps "$STEPS" --lr "$LR" --lr-scheduler "$SCHED" |
| --batch-size "$BATCH" --grad-accum "$GRAD_ACC" --max-grad-norm "$GRAD_NORM" |
| --save-every "$SAVE_EVERY" --log-every "$LOG_EVERY" --seed "$SEED" |
| --warmup-steps "$WARMUP" ) |
| [[ -n "$RESUME_LORA" ]] && CMD+=( --resume-lora "$RESUME_LORA" ) |
| [[ -n "$VAL_CFG" ]] && CMD+=( --val-config "$VAL_CFG" ) |
| CMD+=( $EXTRA_ARGS ) |
|
|
| LAUNCH_ENV=( "CUDA_VISIBLE_DEVICES=$GPUS" ) |
| [[ -n "$TRAIN_VAL_GPU" ]] && LAUNCH_ENV+=( "TRAIN_VAL_GPU=$TRAIN_VAL_GPU" ) |
|
|
| echo "==== launching DramaBox training ====" |
| echo " GPUs: $GPUS (procs: $NUM_PROCS)" |
| echo " out: $OUT_DIR" |
| echo " ckpt: $CKPT" |
| echo " steps: $STEPS lr: $LR $SCHED warmup: $WARMUP" |
| echo "======================================" |
| env "${LAUNCH_ENV[@]}" "${CMD[@]}" |
|
|