| { |
| "activation_quant_modules": [ |
| "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.dense", |
| "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.key", |
| "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.query", |
| "bert.encoder.albert_layer_groups.0.albert_layers.0.attention.value", |
| "bert.encoder.albert_layer_groups.0.albert_layers.0.ffn", |
| "bert.encoder.albert_layer_groups.0.albert_layers.0.ffn_output", |
| "bert.encoder.embedding_hidden_mapping_in", |
| "bert_encoder", |
| "decoder.asr_res.0", |
| "decoder.decode.0.conv1", |
| "decoder.decode.0.conv1x1", |
| "decoder.decode.0.conv2", |
| "decoder.decode.0.norm1.fc", |
| "decoder.decode.0.norm2.fc", |
| "decoder.decode.1.conv1", |
| "decoder.decode.1.conv1x1", |
| "decoder.decode.1.conv2", |
| "decoder.decode.1.norm1.fc", |
| "decoder.decode.1.norm2.fc", |
| "decoder.decode.2.conv1", |
| "decoder.decode.2.conv1x1", |
| "decoder.decode.2.conv2", |
| "decoder.decode.2.norm1.fc", |
| "decoder.decode.2.norm2.fc", |
| "decoder.decode.3.conv1", |
| "decoder.decode.3.conv1x1", |
| "decoder.decode.3.conv2", |
| "decoder.decode.3.norm1.fc", |
| "decoder.decode.3.norm2.fc", |
| "decoder.encode.conv1", |
| "decoder.encode.conv1x1", |
| "decoder.encode.conv2", |
| "decoder.encode.norm1.fc", |
| "decoder.encode.norm2.fc", |
| "decoder.generator.conv_post", |
| "decoder.generator.noise_convs.0", |
| "decoder.generator.noise_convs.1", |
| "decoder.generator.noise_res.0.adain1.0.fc", |
| "decoder.generator.noise_res.0.adain1.1.fc", |
| "decoder.generator.noise_res.0.adain1.2.fc", |
| "decoder.generator.noise_res.0.adain2.0.fc", |
| "decoder.generator.noise_res.0.adain2.1.fc", |
| "decoder.generator.noise_res.0.adain2.2.fc", |
| "decoder.generator.noise_res.0.convs1.0", |
| "decoder.generator.noise_res.0.convs1.1", |
| "decoder.generator.noise_res.0.convs1.2", |
| "decoder.generator.noise_res.0.convs2.0", |
| "decoder.generator.noise_res.0.convs2.1", |
| "decoder.generator.noise_res.0.convs2.2", |
| "decoder.generator.noise_res.1.adain1.0.fc", |
| "decoder.generator.noise_res.1.adain1.1.fc", |
| "decoder.generator.noise_res.1.adain1.2.fc", |
| "decoder.generator.noise_res.1.adain2.0.fc", |
| "decoder.generator.noise_res.1.adain2.1.fc", |
| "decoder.generator.noise_res.1.adain2.2.fc", |
| "decoder.generator.noise_res.1.convs1.0", |
| "decoder.generator.noise_res.1.convs1.1", |
| "decoder.generator.noise_res.1.convs1.2", |
| "decoder.generator.noise_res.1.convs2.0", |
| "decoder.generator.noise_res.1.convs2.1", |
| "decoder.generator.noise_res.1.convs2.2", |
| "decoder.generator.resblocks.0.adain1.0.fc", |
| "decoder.generator.resblocks.0.adain1.1.fc", |
| "decoder.generator.resblocks.0.adain1.2.fc", |
| "decoder.generator.resblocks.0.adain2.0.fc", |
| "decoder.generator.resblocks.0.adain2.1.fc", |
| "decoder.generator.resblocks.0.adain2.2.fc", |
| "decoder.generator.resblocks.0.convs1.0", |
| "decoder.generator.resblocks.0.convs1.1", |
| "decoder.generator.resblocks.0.convs1.2", |
| "decoder.generator.resblocks.0.convs2.0", |
| "decoder.generator.resblocks.0.convs2.1", |
| "decoder.generator.resblocks.0.convs2.2", |
| "decoder.generator.resblocks.1.adain1.0.fc", |
| "decoder.generator.resblocks.1.adain1.1.fc", |
| "decoder.generator.resblocks.1.adain1.2.fc", |
| "decoder.generator.resblocks.1.adain2.0.fc", |
| "decoder.generator.resblocks.1.adain2.1.fc", |
| "decoder.generator.resblocks.1.adain2.2.fc", |
| "decoder.generator.resblocks.1.convs1.0", |
| "decoder.generator.resblocks.1.convs1.1", |
| "decoder.generator.resblocks.1.convs1.2", |
| "decoder.generator.resblocks.1.convs2.0", |
| "decoder.generator.resblocks.1.convs2.1", |
| "decoder.generator.resblocks.1.convs2.2", |
| "decoder.generator.resblocks.2.adain1.0.fc", |
| "decoder.generator.resblocks.2.adain1.1.fc", |
| "decoder.generator.resblocks.2.adain1.2.fc", |
| "decoder.generator.resblocks.2.adain2.0.fc", |
| "decoder.generator.resblocks.2.adain2.1.fc", |
| "decoder.generator.resblocks.2.adain2.2.fc", |
| "decoder.generator.resblocks.2.convs1.0", |
| "decoder.generator.resblocks.2.convs1.1", |
| "decoder.generator.resblocks.2.convs1.2", |
| "decoder.generator.resblocks.2.convs2.0", |
| "decoder.generator.resblocks.2.convs2.1", |
| "decoder.generator.resblocks.2.convs2.2", |
| "decoder.generator.resblocks.3.adain1.0.fc", |
| "decoder.generator.resblocks.3.adain1.1.fc", |
| "decoder.generator.resblocks.3.adain1.2.fc", |
| "decoder.generator.resblocks.3.adain2.0.fc", |
| "decoder.generator.resblocks.3.adain2.1.fc", |
| "decoder.generator.resblocks.3.adain2.2.fc", |
| "decoder.generator.resblocks.3.convs1.0", |
| "decoder.generator.resblocks.3.convs1.1", |
| "decoder.generator.resblocks.3.convs1.2", |
| "decoder.generator.resblocks.3.convs2.0", |
| "decoder.generator.resblocks.3.convs2.1", |
| "decoder.generator.resblocks.3.convs2.2", |
| "predictor.F0.0.conv1", |
| "predictor.F0.0.conv2", |
| "predictor.F0.0.norm1.fc", |
| "predictor.F0.0.norm2.fc", |
| "predictor.F0.1.conv1", |
| "predictor.F0.1.conv1x1", |
| "predictor.F0.1.conv2", |
| "predictor.F0.1.norm1.fc", |
| "predictor.F0.1.norm2.fc", |
| "predictor.F0.2.conv1", |
| "predictor.F0.2.conv2", |
| "predictor.F0.2.norm1.fc", |
| "predictor.F0.2.norm2.fc", |
| "predictor.F0_proj", |
| "predictor.N.0.conv1", |
| "predictor.N.0.conv2", |
| "predictor.N.0.norm1.fc", |
| "predictor.N.0.norm2.fc", |
| "predictor.N.1.conv1", |
| "predictor.N.1.conv1x1", |
| "predictor.N.1.conv2", |
| "predictor.N.1.norm1.fc", |
| "predictor.N.1.norm2.fc", |
| "predictor.N.2.conv1", |
| "predictor.N.2.conv2", |
| "predictor.N.2.norm1.fc", |
| "predictor.N.2.norm2.fc", |
| "predictor.N_proj", |
| "predictor.lstm", |
| "predictor.shared", |
| "predictor.text_encoder.lstms.0", |
| "predictor.text_encoder.lstms.1.fc", |
| "predictor.text_encoder.lstms.2", |
| "predictor.text_encoder.lstms.3.fc", |
| "predictor.text_encoder.lstms.4", |
| "predictor.text_encoder.lstms.5.fc", |
| "text_encoder.cnn.0.0", |
| "text_encoder.cnn.1.0", |
| "text_encoder.cnn.2.0", |
| "text_encoder.lstm" |
| ], |
| "asr_res_dim": 64, |
| "decoder_out_dim": 512, |
| "hidden_dim": 512, |
| "istftnet": { |
| "resblock_kernel_sizes": [ |
| 3, |
| 3 |
| ], |
| "upsample_rates": [ |
| 10, |
| 6 |
| ], |
| "upsample_initial_channel": 512, |
| "resblock_dilation_sizes": [ |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ] |
| ], |
| "upsample_kernel_sizes": [ |
| 20, |
| 12 |
| ], |
| "gen_istft_n_fft": 20, |
| "gen_istft_hop_size": 5 |
| }, |
| "max_conv_dim": 1024, |
| "max_dur": 50, |
| "model_type": "kitten_tts", |
| "n_layer": 3, |
| "n_mels": 80, |
| "n_token": 178, |
| "plbert": { |
| "num_hidden_layers": 12, |
| "num_attention_heads": 12, |
| "hidden_size": 768, |
| "intermediate_size": 2048, |
| "max_position_embeddings": 512, |
| "embedding_size": 128, |
| "inner_group_num": 1, |
| "num_hidden_groups": 1, |
| "hidden_dropout_prob": 0.0, |
| "attention_probs_dropout_prob": 0.0, |
| "type_vocab_size": 2, |
| "layer_norm_eps": 1e-12 |
| }, |
| "sample_rate": 24000, |
| "speed_priors": {}, |
| "style_dim": 128, |
| "text_encoder_kernel_size": 5, |
| "voice_aliases": { |
| "Bella": "expr-voice-2-f", |
| "Jasper": "expr-voice-2-m", |
| "Luna": "expr-voice-3-f", |
| "Bruno": "expr-voice-3-m", |
| "Rosie": "expr-voice-4-f", |
| "Hugo": "expr-voice-4-m", |
| "Kiki": "expr-voice-5-f", |
| "Leo": "expr-voice-5-m" |
| }, |
| "voices_path": "voices.npz" |
| } |