{ "output_dir": "/home/infidea/suwon/omni-voice-advance/exp/omnivoice_0.6B-FT", "data_config": "/home/infidea/suwon/omni-voice-advance/examples/config/data_config_finetune.json", "llm_name_or_path": "Qwen/Qwen3-0.6B", "tokenizer_name_or_path": null, "expected_llm_model_type": null, "expected_llm_hidden_size": null, "expected_llm_intermediate_size": null, "expected_llm_num_hidden_layers": null, "expected_llm_num_attention_heads": null, "expected_llm_num_key_value_heads": null, "expected_llm_vocab_size": null, "audio_vocab_size": 1025, "audio_mask_id": 1024, "num_audio_codebook": 8, "audio_codebook_weights": [ 8, 8, 6, 6, 4, 4, 2, 2 ], "drop_cond_ratio": 0.1, "prompt_ratio_range": [ 0.05, 0.3 ], "mask_ratio_range": [ 0.3, 0.9 ], "min_masked_audio_tokens": 8, "language_ratio": 0.0, "use_pinyin_ratio": 0.0, "instruct_ratio": 0.7, "only_instruct_ratio": 0.3, "log_codebook_losses": true, "loss_label_smoothing": 0.01, "resume_from_checkpoint": null, "init_from_checkpoint": "k2-fsa/OmniVoice", "learning_rate": 2e-05, "weight_decay": 0.01, "max_grad_norm": 1.0, "steps": 1000000, "seed": 42, "lr_scheduler_type": "cosine", "warmup_type": "steps", "warmup_ratio": 0.03, "warmup_steps": 10000, "batch_tokens": 8192, "gradient_accumulation_steps": 4, "num_workers": 2, "mixed_precision": "bf16", "allow_tf32": true, "require_cuda": true, "use_deepspeed": false, "deepspeed_config": null, "compile_flex_attention_mask": true, "validate_audio_token_range": false, "skip_bad_batches": true, "max_consecutive_batch_skips": 50, "attn_implementation": "flex_attention", "logging_steps": 100, "eval_steps": 500, "save_steps": 500, "keep_last_n_checkpoints": 3, "use_wandb": true, "wandb_project": "omnivoice_0.6B-FT", "wandb_entity": null, "wandb_run_name": null, "wandb_group": null, "wandb_tags": [], "wandb_mode": null, "inference_logging_steps": 1000, "inference_logging_text": null, "inference_logging_language": null, "inference_logging_ref_audio": null, "inference_logging_ref_text": null, "inference_logging_num_step": 16, "inference_logging_guidance_scale": 2.0, "inference_logging_speed": 1.0, "inference_logging_duration": null, "inference_audio_tokenizer_path": null, "inference_logging_jsonl_dir": [ "/home/infidea/tts-data/suwon/OmniVoice_data/server_data/txts", "/home/infidea/tts-data/suwon/OmniVoice_data/ml-tts-data-others/txts", "/home/infidea/tts-data/suwon/OmniVoice_data/voice_design_all/txts" ], "inference_logging_voice_design_jsonl_dir": [ "/home/infidea/tts-data/suwon/OmniVoice_data/voice_design_all/txts" ], "inference_logging_save_eval_artifacts": true, "inference_logging_eval_dir": null }