Spaces:
Running on Zero
Running on Zero
| # DramaBox IC-LoRA training config β values become the defaults for | |
| # `accelerate launch src/train.py --config configs/training_args.example.yaml`. | |
| # Any flag explicitly passed on the CLI overrides the YAML. | |
| # ββ Data βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # One entry per preprocessed dataset (output dirs from src/preprocess.py). | |
| data_dir: | |
| - /path/to/preprocessed_dataset_a/ | |
| - /path/to/preprocessed_dataset_b/ | |
| # One index file per data_dir entry. Each line follows the format you fed to | |
| # preprocess.py β see README "Prepare your index file". | |
| speaker_index: | |
| - /path/to/preprocessed_dataset_a/index.txt | |
| - /path/to/preprocessed_dataset_b/index.txt | |
| # Output directory for LoRA shards + logs (relative paths resolve against the | |
| # repo root). | |
| output_dir: tts_iclora_v1 | |
| # ββ Base model βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Train your LoRA on top of DramaBox itself (recommended) β the trimmed audio | |
| # components are enough; no need to ship the raw LTX-2.3 base. | |
| checkpoint: dramabox-dit-v1.safetensors | |
| full_checkpoint: dramabox-audio-components.safetensors | |
| base_model: dev # 'dev' = ShiftedLogitNormal sampler; 'distilled' = DistilledTimestepSampler | |
| # ββ LoRA hyperparams (rank == alpha β scale = 1.0) βββββββββββββββββββββββββ | |
| lora_rank: 128 | |
| lora_alpha: 128 | |
| lora_dropout: 0.1 # ~0.1 helps regularize on small datasets | |
| # Resume an existing LoRA β step number parsed from the filename | |
| # (e.g. lora_step_05000.safetensors β starts at step 5000). | |
| # resume_lora: tts_iclora_v0/lora_step_05000.safetensors | |
| # ββ Voice-cloning reference tokens βββββββββββββββββββββββββββββββββββββββββ | |
| ref_ratio: 0.3 # fraction of training samples that get a ref-token tail | |
| max_ref_tokens: 200 # cap on appended ref tokens after patchification | |
| # CFG training: probability of zeroing the text condition (forces reliance on | |
| # the voice ref / unconditional path). | |
| text_dropout: 0.4 | |
| # ββ Schedule βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Cosine + 1e-4 = from-scratch fine-tune. | |
| # Constant + 1e-5 = polish on top of an existing LoRA (use with `resume_lora`). | |
| steps: 10000 | |
| lr: 1.0e-04 | |
| lr_scheduler: cosine | |
| warmup_steps: 500 | |
| batch_size: 1 | |
| grad_accum: 4 | |
| max_grad_norm: 1.0 | |
| save_every: 500 | |
| log_every: 50 | |
| seed: 53 | |
| # Optional per-save-step validation pass. Generates a sample for every speaker | |
| # in the val_config so you can A/B listen during training. | |
| # val_config: configs/val_config.example.yaml | |