File size: 911 Bytes
454a376 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | {
"model": {
"fm_decoder_downsampling_factor": [
1,
2,
4,
2,
1
],
"fm_decoder_num_layers": [
2,
2,
4,
4,
4
],
"fm_decoder_cnn_module_kernel": [
31,
15,
7,
15,
31
],
"fm_decoder_feedforward_dim": 1536,
"fm_decoder_num_heads": 4,
"fm_decoder_dim": 512,
"text_encoder_num_layers": 4,
"text_encoder_feedforward_dim": 512,
"text_encoder_cnn_module_kernel": 9,
"text_encoder_num_heads": 4,
"text_encoder_dim": 192,
"query_head_dim": 32,
"value_head_dim": 12,
"pos_head_dim": 4,
"pos_dim": 48,
"time_embed_dim": 192,
"text_embed_dim": 192,
"feat_dim": 100
},
"feature": {
"type": "vocos",
"n_fft": 1024,
"hop_length": 256,
"n_mels": 100,
"sample_rate": 24000
},
"architectures": [
"LuxTTSForTextToSpeech"
]
} |