| { | |
| "architectures": [ | |
| "SpeechT5HifiGan" | |
| ], | |
| "initializer_range": 0.01, | |
| "leaky_relu_slope": 0.1, | |
| "model_in_dim": 80, | |
| "model_type": "hifigan", | |
| "normalize_before": true, | |
| "resblock_dilation_sizes": [ | |
| [1, 3, 5], | |
| [1, 3, 5], | |
| [1, 3, 5] | |
| ], | |
| "resblock_kernel_sizes": [3, 7, 11], | |
| "sampling_rate": 22050, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.52.4", | |
| "upsample_initial_channel": 512, | |
| "upsample_kernel_sizes": [16, 16, 4, 4], | |
| "upsample_rates": [8, 8, 2, 2] | |
| } | |