{ "barbet_config": { "attention_dropout": 0.0, "attention_sink": false, "bos_token_id": 114689, "eos_token_id": 114690, "global_attention_layers": [ 0, 4, 8, 12, 16, 20, 24 ], "head_dim": 128, "hidden_dropout": 0.0, "hidden_size": 1536, "initializer_range": 0.02, "intermediate_size": 5120, "mamba_d_conv": 4, "mamba_d_state": 64, "mamba_expand": 2, "mamba_layers": [ 3, 7, 11, 15, 19, 23, 27 ], "max_position_embeddings": 262144, "mtp_enabled": false, "mtp_loss_weights": { "2": 0.2, "3": 0.1 }, "mtp_offsets": [ 2, 3 ], "num_attention_heads": 16, "num_hidden_layers": 28, "num_key_value_heads": 2, "pad_token_id": 114691, "qk_clip_alpha": 0.5, "qk_clip_threshold": 100.0, "qk_logit_clip": false, "qk_norm": true, "rms_norm_eps": 1e-06, "rope_theta": 10000000.0, "sliding_window_size": 8192, "tie_word_embeddings": true, "unk_token_id": 114688, "use_cache": true, "vocab_size": 114944 }, "vox_lm_config": { "bos_token_id": 1, "eos_token_id": 2, "hidden_size": 2048, "intermediate_size": 6144, "max_position_embeddings": 32768, "num_attention_heads": 16, "num_hidden_layers": 28, "num_key_value_heads": 2, "rms_norm_eps": 1e-05, "rope_scaling": { "type": "longrope", "long_factor": [ 0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529 ], "short_factor": [ 0.9977997200264581, 1.014658295992452, 1.0349680404997148, 1.059429246056193, 1.0888815016813513, 1.1243301355211495, 1.166977103606075, 1.2182568066927284, 1.2798772354275727, 1.3538666751582975, 1.4426259039919596, 1.5489853358570191, 1.6762658237220625, 1.8283407612492941, 2.0096956085876183, 2.225478927469756, 2.481536379650452, 2.784415934557119, 3.1413289096347365, 3.560047844772632, 4.048719380066383, 4.615569542115128, 5.2684819496549835, 6.014438591970396, 6.858830049237097, 7.804668263503327, 8.851768731513417, 9.99600492938444, 11.228766118181639, 12.536757560834843, 13.902257701387796, 15.303885189125953, 16.717837610115794, 18.119465097853947, 19.484965238406907, 20.792956681060105, 22.02571786985731, 23.16995406772833, 24.217054535738416, 25.16289275000465, 26.007284207271347, 26.753240849586767, 27.40615325712662, 27.973003419175363, 28.461674954469114, 28.880393889607006, 29.237306864684626, 29.540186419591297, 29.79624387177199, 30.01202719065413, 30.193382037992453, 30.34545697551969, 30.47273746338473, 30.579096895249787, 30.66785612408345, 30.741845563814174, 30.80346599254902, 30.85474569563567, 30.897392663720595, 30.932841297560394, 30.962293553185553, 30.986754758742034, 31.007064503249293, 31.02392307921529 ], "original_max_position_embeddings": 32768 }, "vocab_size": 73448, "use_mup": false, "scale_emb": 12.0, "dim_model_base": 256, "scale_depth": 1.4, "rope_theta": 10000.0, "kv_channels": 128, "no_rope": false }, "patch_size": 4, "feat_dim": 64, "residual_lm_num_layers": 8, "residual_lm_no_rope": true, "scalar_quantization_latent_dim": 512, "scalar_quantization_scale": 9, "encoder_config": { "hidden_dim": 1024, "ffn_dim": 4096, "num_heads": 16, "num_layers": 12, "kv_channels": 128 }, "dit_config": { "hidden_dim": 1024, "ffn_dim": 4096, "num_heads": 16, "num_layers": 12, "kv_channels": 128, "dit_mean_mode": false, "cfm_config": { "sigma_min": 1e-06, "solver": "euler", "t_scheduler": "log-norm", "training_cfg_rate": 0.1, "inference_cfg_rate": 2.0, "reg_loss_type": "l1", "ratio_r_neq_t_range": [ 0.25, 0.75 ], "noise_cond_prob_range": [ 0.0, 0.0 ], "noise_cond_scale": 0.0 } }, "audio_vae_config": { "encoder_dim": 128, "encoder_rates": [ 2, 5, 8, 8 ], "latent_dim": 64, "decoder_dim": 2048, "decoder_rates": [ 8, 6, 5, 2, 2, 2 ], "depthwise": true, "sample_rate": 16000, "out_sample_rate": 48000, "use_noise_block": false, "sr_bin_boundaries": [ 20000, 30000, 40000 ], "cond_type": "scale_bias", "cond_dim": 128, "cond_out_layer": false }, "adapter_config": { "num_residual_blocks": 1, "ffn_mult": 2.0, "rms_norm_eps": 1e-06 }, "speaker_embed_dim": 192, "audio_start_token": -1, "audio_end_token": -1, "ref_audio_start_token": -1, "ref_audio_end_token": -1, "spk_token": -1, "barbet_effective_vocab_size": null, "max_length": 8192, "device": "cuda", "dtype": "bfloat16", "generation_defaults": { "cfg_value": 2.8, "inference_timesteps": 9, "max_len": 2000, "retry_badcase": true, "retry_badcase_max_times": 3, "retry_badcase_ratio_threshold": 6.0, "speaker_id": "hung_yi_lee", "speaker_source_dataset": "voidful/hung-yi_lee", "speaker_centroid_path": "checkpoints/hung_yi_lee_speaker_centroids.pt", "speaker_centroid_sha256": "e1d4c95a4c33935ff1fee0ab47fa796dcc13908a183c60e9b02bc0a61c541c4c", "speaker_centroid_dim": 192 }, "generation_defaults_source": { "scope": "tts_hard_sentences_zh_500 + Breeze-ASR-25 normalized CER", "sentences": "/home/voidful/tts_hard_sentences_zh_500.txt", "asr_model": "MediaTek-Research/Breeze-ASR-25", "conversion": "s2twp", "normalized_cer": 0.09669792733863977, "mixed_token_error_rate": 0.0911015155363644, "char_errors": 1227, "char_reference_length": 12689, "evaluated_examples": 500, "trial": "hy_cfg2p8_steps9", "run": "hungyi_high_refine_hy_cfg2p8_steps9_20260620" } }