Dramabox / configs /training_args.example.yaml
Manmay Nakhashi
Revert: keep DramaBox naming (rebrand reverted per CEO)
fdc2b0b
# DramaBox IC-LoRA training config β€” values become the defaults for
# `accelerate launch src/train.py --config configs/training_args.example.yaml`.
# Any flag explicitly passed on the CLI overrides the YAML.
# ── Data ───────────────────────────────────────────────────────────────────
# One entry per preprocessed dataset (output dirs from src/preprocess.py).
data_dir:
- /path/to/preprocessed_dataset_a/
- /path/to/preprocessed_dataset_b/
# One index file per data_dir entry. Each line follows the format you fed to
# preprocess.py β€” see README "Prepare your index file".
speaker_index:
- /path/to/preprocessed_dataset_a/index.txt
- /path/to/preprocessed_dataset_b/index.txt
# Output directory for LoRA shards + logs (relative paths resolve against the
# repo root).
output_dir: tts_iclora_v1
# ── Base model ─────────────────────────────────────────────────────────────
# Train your LoRA on top of DramaBox itself (recommended) β€” the trimmed audio
# components are enough; no need to ship the raw LTX-2.3 base.
checkpoint: dramabox-dit-v1.safetensors
full_checkpoint: dramabox-audio-components.safetensors
base_model: dev # 'dev' = ShiftedLogitNormal sampler; 'distilled' = DistilledTimestepSampler
# ── LoRA hyperparams (rank == alpha β†’ scale = 1.0) ─────────────────────────
lora_rank: 128
lora_alpha: 128
lora_dropout: 0.1 # ~0.1 helps regularize on small datasets
# Resume an existing LoRA β€” step number parsed from the filename
# (e.g. lora_step_05000.safetensors β†’ starts at step 5000).
# resume_lora: tts_iclora_v0/lora_step_05000.safetensors
# ── Voice-cloning reference tokens ─────────────────────────────────────────
ref_ratio: 0.3 # fraction of training samples that get a ref-token tail
max_ref_tokens: 200 # cap on appended ref tokens after patchification
# CFG training: probability of zeroing the text condition (forces reliance on
# the voice ref / unconditional path).
text_dropout: 0.4
# ── Schedule ───────────────────────────────────────────────────────────────
# Cosine + 1e-4 = from-scratch fine-tune.
# Constant + 1e-5 = polish on top of an existing LoRA (use with `resume_lora`).
steps: 10000
lr: 1.0e-04
lr_scheduler: cosine
warmup_steps: 500
batch_size: 1
grad_accum: 4
max_grad_norm: 1.0
save_every: 500
log_every: 50
seed: 53
# Optional per-save-step validation pass. Generates a sample for every speaker
# in the val_config so you can A/B listen during training.
# val_config: configs/val_config.example.yaml