voice_design / checkpoints /v2 /train_config.json
suwon's picture
Upload folder using huggingface_hub
c8c81e3 verified
{
"output_dir": "/home/infidea/suwon/omni-voice-advance/exp/omnivoice_0.6B-FT",
"data_config": "/home/infidea/suwon/omni-voice-advance/examples/config/data_config_finetune.json",
"llm_name_or_path": "Qwen/Qwen3-0.6B",
"tokenizer_name_or_path": null,
"expected_llm_model_type": null,
"expected_llm_hidden_size": null,
"expected_llm_intermediate_size": null,
"expected_llm_num_hidden_layers": null,
"expected_llm_num_attention_heads": null,
"expected_llm_num_key_value_heads": null,
"expected_llm_vocab_size": null,
"audio_vocab_size": 1025,
"audio_mask_id": 1024,
"num_audio_codebook": 8,
"audio_codebook_weights": [
8,
8,
6,
6,
4,
4,
2,
2
],
"drop_cond_ratio": 0.1,
"prompt_ratio_range": [
0.05,
0.3
],
"mask_ratio_range": [
0.3,
0.9
],
"min_masked_audio_tokens": 8,
"language_ratio": 0.0,
"use_pinyin_ratio": 0.0,
"instruct_ratio": 0.7,
"only_instruct_ratio": 0.3,
"log_codebook_losses": true,
"loss_label_smoothing": 0.01,
"resume_from_checkpoint": null,
"init_from_checkpoint": "k2-fsa/OmniVoice",
"learning_rate": 2e-05,
"weight_decay": 0.01,
"max_grad_norm": 1.0,
"steps": 1000000,
"seed": 42,
"lr_scheduler_type": "cosine",
"warmup_type": "steps",
"warmup_ratio": 0.03,
"warmup_steps": 10000,
"batch_tokens": 8192,
"gradient_accumulation_steps": 4,
"num_workers": 2,
"mixed_precision": "bf16",
"allow_tf32": true,
"require_cuda": true,
"use_deepspeed": false,
"deepspeed_config": null,
"compile_flex_attention_mask": true,
"validate_audio_token_range": false,
"skip_bad_batches": true,
"max_consecutive_batch_skips": 50,
"attn_implementation": "flex_attention",
"logging_steps": 100,
"eval_steps": 500,
"save_steps": 500,
"keep_last_n_checkpoints": 3,
"use_wandb": true,
"wandb_project": "omnivoice_0.6B-FT",
"wandb_entity": null,
"wandb_run_name": null,
"wandb_group": null,
"wandb_tags": [],
"wandb_mode": null,
"inference_logging_steps": 1000,
"inference_logging_text": null,
"inference_logging_language": null,
"inference_logging_ref_audio": null,
"inference_logging_ref_text": null,
"inference_logging_num_step": 16,
"inference_logging_guidance_scale": 2.0,
"inference_logging_speed": 1.0,
"inference_logging_duration": null,
"inference_audio_tokenizer_path": null,
"inference_logging_jsonl_dir": [
"/home/infidea/tts-data/suwon/OmniVoice_data/server_data/txts",
"/home/infidea/tts-data/suwon/OmniVoice_data/ml-tts-data-others/txts",
"/home/infidea/tts-data/suwon/OmniVoice_data/voice_design_all/txts"
],
"inference_logging_voice_design_jsonl_dir": [
"/home/infidea/tts-data/suwon/OmniVoice_data/voice_design_all/txts"
],
"inference_logging_save_eval_artifacts": true,
"inference_logging_eval_dir": null
}