BlueMagpie-TTS / config.json
voidful's picture
Initial OpenFormosa release of BlueMagpie-TTS (rebrand from voidful; provenance refs preserved)
4e9e0e4 verified
Raw
History Blame Contribute Delete
8.07 kB
{
"barbet_config": {
"attention_dropout": 0.0,
"attention_sink": false,
"bos_token_id": 114689,
"eos_token_id": 114690,
"global_attention_layers": [
0,
4,
8,
12,
16,
20,
24
],
"head_dim": 128,
"hidden_dropout": 0.0,
"hidden_size": 1536,
"initializer_range": 0.02,
"intermediate_size": 5120,
"mamba_d_conv": 4,
"mamba_d_state": 64,
"mamba_expand": 2,
"mamba_layers": [
3,
7,
11,
15,
19,
23,
27
],
"max_position_embeddings": 262144,
"mtp_enabled": false,
"mtp_loss_weights": {
"2": 0.2,
"3": 0.1
},
"mtp_offsets": [
2,
3
],
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 2,
"pad_token_id": 114691,
"qk_clip_alpha": 0.5,
"qk_clip_threshold": 100.0,
"qk_logit_clip": false,
"qk_norm": true,
"rms_norm_eps": 1e-06,
"rope_theta": 10000000.0,
"sliding_window_size": 8192,
"tie_word_embeddings": true,
"unk_token_id": 114688,
"use_cache": true,
"vocab_size": 114944
},
"vox_lm_config": {
"bos_token_id": 1,
"eos_token_id": 2,
"hidden_size": 2048,
"intermediate_size": 6144,
"max_position_embeddings": 32768,
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 2,
"rms_norm_eps": 1e-05,
"rope_scaling": {
"type": "longrope",
"long_factor": [
0.9977997200264581,
1.014658295992452,
1.0349680404997148,
1.059429246056193,
1.0888815016813513,
1.1243301355211495,
1.166977103606075,
1.2182568066927284,
1.2798772354275727,
1.3538666751582975,
1.4426259039919596,
1.5489853358570191,
1.6762658237220625,
1.8283407612492941,
2.0096956085876183,
2.225478927469756,
2.481536379650452,
2.784415934557119,
3.1413289096347365,
3.560047844772632,
4.048719380066383,
4.615569542115128,
5.2684819496549835,
6.014438591970396,
6.858830049237097,
7.804668263503327,
8.851768731513417,
9.99600492938444,
11.228766118181639,
12.536757560834843,
13.902257701387796,
15.303885189125953,
16.717837610115794,
18.119465097853947,
19.484965238406907,
20.792956681060105,
22.02571786985731,
23.16995406772833,
24.217054535738416,
25.16289275000465,
26.007284207271347,
26.753240849586767,
27.40615325712662,
27.973003419175363,
28.461674954469114,
28.880393889607006,
29.237306864684626,
29.540186419591297,
29.79624387177199,
30.01202719065413,
30.193382037992453,
30.34545697551969,
30.47273746338473,
30.579096895249787,
30.66785612408345,
30.741845563814174,
30.80346599254902,
30.85474569563567,
30.897392663720595,
30.932841297560394,
30.962293553185553,
30.986754758742034,
31.007064503249293,
31.02392307921529
],
"short_factor": [
0.9977997200264581,
1.014658295992452,
1.0349680404997148,
1.059429246056193,
1.0888815016813513,
1.1243301355211495,
1.166977103606075,
1.2182568066927284,
1.2798772354275727,
1.3538666751582975,
1.4426259039919596,
1.5489853358570191,
1.6762658237220625,
1.8283407612492941,
2.0096956085876183,
2.225478927469756,
2.481536379650452,
2.784415934557119,
3.1413289096347365,
3.560047844772632,
4.048719380066383,
4.615569542115128,
5.2684819496549835,
6.014438591970396,
6.858830049237097,
7.804668263503327,
8.851768731513417,
9.99600492938444,
11.228766118181639,
12.536757560834843,
13.902257701387796,
15.303885189125953,
16.717837610115794,
18.119465097853947,
19.484965238406907,
20.792956681060105,
22.02571786985731,
23.16995406772833,
24.217054535738416,
25.16289275000465,
26.007284207271347,
26.753240849586767,
27.40615325712662,
27.973003419175363,
28.461674954469114,
28.880393889607006,
29.237306864684626,
29.540186419591297,
29.79624387177199,
30.01202719065413,
30.193382037992453,
30.34545697551969,
30.47273746338473,
30.579096895249787,
30.66785612408345,
30.741845563814174,
30.80346599254902,
30.85474569563567,
30.897392663720595,
30.932841297560394,
30.962293553185553,
30.986754758742034,
31.007064503249293,
31.02392307921529
],
"original_max_position_embeddings": 32768
},
"vocab_size": 73448,
"use_mup": false,
"scale_emb": 12.0,
"dim_model_base": 256,
"scale_depth": 1.4,
"rope_theta": 10000.0,
"kv_channels": 128,
"no_rope": false
},
"patch_size": 4,
"feat_dim": 64,
"residual_lm_num_layers": 8,
"residual_lm_no_rope": true,
"scalar_quantization_latent_dim": 512,
"scalar_quantization_scale": 9,
"encoder_config": {
"hidden_dim": 1024,
"ffn_dim": 4096,
"num_heads": 16,
"num_layers": 12,
"kv_channels": 128
},
"dit_config": {
"hidden_dim": 1024,
"ffn_dim": 4096,
"num_heads": 16,
"num_layers": 12,
"kv_channels": 128,
"dit_mean_mode": false,
"cfm_config": {
"sigma_min": 1e-06,
"solver": "euler",
"t_scheduler": "log-norm",
"training_cfg_rate": 0.1,
"inference_cfg_rate": 2.0,
"reg_loss_type": "l1",
"ratio_r_neq_t_range": [
0.25,
0.75
],
"noise_cond_prob_range": [
0.0,
0.0
],
"noise_cond_scale": 0.0
}
},
"audio_vae_config": {
"encoder_dim": 128,
"encoder_rates": [
2,
5,
8,
8
],
"latent_dim": 64,
"decoder_dim": 2048,
"decoder_rates": [
8,
6,
5,
2,
2,
2
],
"depthwise": true,
"sample_rate": 16000,
"out_sample_rate": 48000,
"use_noise_block": false,
"sr_bin_boundaries": [
20000,
30000,
40000
],
"cond_type": "scale_bias",
"cond_dim": 128,
"cond_out_layer": false
},
"adapter_config": {
"num_residual_blocks": 1,
"ffn_mult": 2.0,
"rms_norm_eps": 1e-06
},
"speaker_embed_dim": 192,
"audio_start_token": -1,
"audio_end_token": -1,
"ref_audio_start_token": -1,
"ref_audio_end_token": -1,
"spk_token": -1,
"barbet_effective_vocab_size": null,
"max_length": 8192,
"device": "cuda",
"dtype": "bfloat16",
"generation_defaults": {
"cfg_value": 2.8,
"inference_timesteps": 9,
"max_len": 2000,
"retry_badcase": true,
"retry_badcase_max_times": 3,
"retry_badcase_ratio_threshold": 6.0,
"speaker_id": "hung_yi_lee",
"speaker_source_dataset": "voidful/hung-yi_lee",
"speaker_centroid_path": "checkpoints/hung_yi_lee_speaker_centroids.pt",
"speaker_centroid_sha256": "e1d4c95a4c33935ff1fee0ab47fa796dcc13908a183c60e9b02bc0a61c541c4c",
"speaker_centroid_dim": 192
},
"generation_defaults_source": {
"scope": "tts_hard_sentences_zh_500 + Breeze-ASR-25 normalized CER",
"sentences": "/home/voidful/tts_hard_sentences_zh_500.txt",
"asr_model": "MediaTek-Research/Breeze-ASR-25",
"conversion": "s2twp",
"normalized_cer": 0.09669792733863977,
"mixed_token_error_rate": 0.0911015155363644,
"char_errors": 1227,
"char_reference_length": 12689,
"evaluated_examples": 500,
"trial": "hy_cfg2p8_steps9",
"run": "hungyi_high_refine_hy_cfg2p8_steps9_20260620"
}
}