{ "_class_name": "LTX2VocoderWithBWE", "_diffusers_version": "0.37.0.dev0", "act_fn": "snakebeta", "antialias": true, "antialias_kernel_size": 12, "antialias_ratio": 2, "bwe_act_fn": "snakebeta", "bwe_antialias": true, "bwe_antialias_kernel_size": 12, "bwe_antialias_ratio": 2, "bwe_final_act_fn": null, "bwe_final_bias": false, "bwe_hidden_channels": 512, "bwe_in_channels": 128, "bwe_leaky_relu_negative_slope": 0.1, "bwe_out_channels": 2, "bwe_resnet_dilations": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "bwe_resnet_kernel_sizes": [ 3, 7, 11 ], "bwe_upsample_factors": [ 6, 5, 2, 2, 2 ], "bwe_upsample_kernel_sizes": [ 12, 11, 4, 4, 4 ], "filter_length": 512, "final_act_fn": null, "final_bias": false, "hidden_channels": 1536, "hop_length": 80, "in_channels": 128, "input_sampling_rate": 16000, "leaky_relu_negative_slope": 0.1, "num_mel_channels": 64, "out_channels": 2, "output_sampling_rate": 48000, "resnet_dilations": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "resnet_kernel_sizes": [ 3, 7, 11 ], "upsample_factors": [ 5, 2, 2, 2, 2, 2 ], "upsample_kernel_sizes": [ 11, 4, 4, 4, 4, 4 ], "window_length": 512 }