LIT-TTS
/

voice_design

Model card Files Files and versions

voice_design / checkpoints /v2 /train_config.json

suwon's picture

Upload folder using huggingface_hub

c8c81e3 verified about 1 month ago

history blame contribute delete

3.06 kB

	{
	"output_dir": "/home/infidea/suwon/omni-voice-advance/exp/omnivoice_0.6B-FT",
	"data_config": "/home/infidea/suwon/omni-voice-advance/examples/config/data_config_finetune.json",
	"llm_name_or_path": "Qwen/Qwen3-0.6B",
	"tokenizer_name_or_path": null,
	"expected_llm_model_type": null,
	"expected_llm_hidden_size": null,
	"expected_llm_intermediate_size": null,
	"expected_llm_num_hidden_layers": null,
	"expected_llm_num_attention_heads": null,
	"expected_llm_num_key_value_heads": null,
	"expected_llm_vocab_size": null,
	"audio_vocab_size": 1025,
	"audio_mask_id": 1024,
	"num_audio_codebook": 8,
	"audio_codebook_weights": [
	8,
	8,
	6,
	6,
	4,
	4,
	2,
	2
	],
	"drop_cond_ratio": 0.1,
	"prompt_ratio_range": [
	0.05,
	0.3
	],
	"mask_ratio_range": [
	0.3,
	0.9
	],
	"min_masked_audio_tokens": 8,
	"language_ratio": 0.0,
	"use_pinyin_ratio": 0.0,
	"instruct_ratio": 0.7,
	"only_instruct_ratio": 0.3,
	"log_codebook_losses": true,
	"loss_label_smoothing": 0.01,
	"resume_from_checkpoint": null,
	"init_from_checkpoint": "k2-fsa/OmniVoice",
	"learning_rate": 2e-05,
	"weight_decay": 0.01,
	"max_grad_norm": 1.0,
	"steps": 1000000,
	"seed": 42,
	"lr_scheduler_type": "cosine",
	"warmup_type": "steps",
	"warmup_ratio": 0.03,
	"warmup_steps": 10000,
	"batch_tokens": 8192,
	"gradient_accumulation_steps": 4,
	"num_workers": 2,
	"mixed_precision": "bf16",
	"allow_tf32": true,
	"require_cuda": true,
	"use_deepspeed": false,
	"deepspeed_config": null,
	"compile_flex_attention_mask": true,
	"validate_audio_token_range": false,
	"skip_bad_batches": true,
	"max_consecutive_batch_skips": 50,
	"attn_implementation": "flex_attention",
	"logging_steps": 100,
	"eval_steps": 500,
	"save_steps": 500,
	"keep_last_n_checkpoints": 3,
	"use_wandb": true,
	"wandb_project": "omnivoice_0.6B-FT",
	"wandb_entity": null,
	"wandb_run_name": null,
	"wandb_group": null,
	"wandb_tags": [],
	"wandb_mode": null,
	"inference_logging_steps": 1000,
	"inference_logging_text": null,
	"inference_logging_language": null,
	"inference_logging_ref_audio": null,
	"inference_logging_ref_text": null,
	"inference_logging_num_step": 16,
	"inference_logging_guidance_scale": 2.0,
	"inference_logging_speed": 1.0,
	"inference_logging_duration": null,
	"inference_audio_tokenizer_path": null,
	"inference_logging_jsonl_dir": [
	"/home/infidea/tts-data/suwon/OmniVoice_data/server_data/txts",
	"/home/infidea/tts-data/suwon/OmniVoice_data/ml-tts-data-others/txts",
	"/home/infidea/tts-data/suwon/OmniVoice_data/voice_design_all/txts"
	],
	"inference_logging_voice_design_jsonl_dir": [
	"/home/infidea/tts-data/suwon/OmniVoice_data/voice_design_all/txts"
	],
	"inference_logging_save_eval_artifacts": true,
	"inference_logging_eval_dir": null
	}