{ "architectures": [ "SpeechT5HifiGan" ], "initializer_range": 0.01, "leaky_relu_slope": 0.1, "model_in_dim": 80, "model_type": "hifigan", "normalize_before": true, "resblock_dilation_sizes": [ [1, 3, 5], [1, 3, 5], [1, 3, 5] ], "resblock_kernel_sizes": [3, 7, 11], "sampling_rate": 22050, "torch_dtype": "float32", "transformers_version": "4.52.4", "upsample_initial_channel": 512, "upsample_kernel_sizes": [16, 16, 4, 4], "upsample_rates": [8, 8, 2, 2] }