| { |
| "output_dir": "exp/omnivoice_vietnamese_4kh", |
| "data_config": "/vast/tts/robert/OmniVoice/data/data_config_vietnamese.json", |
| "llm_name_or_path": "Qwen/Qwen3-0.6B", |
| "audio_vocab_size": 1025, |
| "audio_mask_id": 1024, |
| "num_audio_codebook": 8, |
| "audio_codebook_weights": [ |
| 8, |
| 8, |
| 6, |
| 6, |
| 4, |
| 4, |
| 2, |
| 2 |
| ], |
| "drop_cond_ratio": 0.1, |
| "prompt_ratio_range": [ |
| 0.0, |
| 0.3 |
| ], |
| "mask_ratio_range": [ |
| 0.0, |
| 1.0 |
| ], |
| "language_ratio": 0.8, |
| "use_pinyin_ratio": 0.0, |
| "instruct_ratio": 0.0, |
| "only_instruct_ratio": 0.0, |
| "resume_from_checkpoint": null, |
| "init_from_checkpoint": "k2-fsa/OmniVoice", |
| "learning_rate": 1e-05, |
| "weight_decay": 0.01, |
| "max_grad_norm": 1.0, |
| "steps": 100000, |
| "epochs": null, |
| "seed": 42, |
| "lr_scheduler_type": "cosine", |
| "warmup_type": "ratio", |
| "warmup_ratio": 0.01, |
| "warmup_steps": 0, |
| "batch_tokens": 8192, |
| "gradient_accumulation_steps": 1, |
| "num_workers": 2, |
| "mixed_precision": "bf16", |
| "allow_tf32": true, |
| "use_deepspeed": false, |
| "deepspeed_config": null, |
| "attn_implementation": "flex_attention", |
| "max_sample_tokens": 2000, |
| "min_sample_tokens": 50, |
| "max_batch_size": 64, |
| "logging_steps": 50, |
| "eval_steps": 10000, |
| "save_steps": 10000, |
| "keep_last_n_checkpoints": -1 |
| } |