| { |
| "_class_name": "LTX2VocoderWithBWE", |
| "_diffusers_version": "0.37.0.dev0", |
| "act_fn": "snakebeta", |
| "antialias": true, |
| "antialias_kernel_size": 12, |
| "antialias_ratio": 2, |
| "bwe_act_fn": "snakebeta", |
| "bwe_antialias": true, |
| "bwe_antialias_kernel_size": 12, |
| "bwe_antialias_ratio": 2, |
| "bwe_final_act_fn": null, |
| "bwe_final_bias": false, |
| "bwe_hidden_channels": 512, |
| "bwe_in_channels": 128, |
| "bwe_leaky_relu_negative_slope": 0.1, |
| "bwe_out_channels": 2, |
| "bwe_resnet_dilations": [ |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ] |
| ], |
| "bwe_resnet_kernel_sizes": [ |
| 3, |
| 7, |
| 11 |
| ], |
| "bwe_upsample_factors": [ |
| 6, |
| 5, |
| 2, |
| 2, |
| 2 |
| ], |
| "bwe_upsample_kernel_sizes": [ |
| 12, |
| 11, |
| 4, |
| 4, |
| 4 |
| ], |
| "filter_length": 512, |
| "final_act_fn": null, |
| "final_bias": false, |
| "hidden_channels": 1536, |
| "hop_length": 80, |
| "in_channels": 128, |
| "input_sampling_rate": 16000, |
| "leaky_relu_negative_slope": 0.1, |
| "num_mel_channels": 64, |
| "out_channels": 2, |
| "output_sampling_rate": 48000, |
| "resnet_dilations": [ |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ], |
| [ |
| 1, |
| 3, |
| 5 |
| ] |
| ], |
| "resnet_kernel_sizes": [ |
| 3, |
| 7, |
| 11 |
| ], |
| "upsample_factors": [ |
| 5, |
| 2, |
| 2, |
| 2, |
| 2, |
| 2 |
| ], |
| "upsample_kernel_sizes": [ |
| 11, |
| 4, |
| 4, |
| 4, |
| 4, |
| 4 |
| ], |
| "window_length": 512 |
| } |
|
|