| exclude_from_checkpoint: true |
| trainable: true |
| istft_head: |
| _target_: woosh.module.model.vocos.ISTFTCircleHead |
| _partial_: true |
| padding: center |
| conv_pad: same |
| conv_kernel: 1 |
| softclip: softplus |
| ztransform: |
| _target_: woosh.module.model.vocos.ZeroDropoutTransform |
| _partial_: true |
| p: 1.0 |
| _target_: woosh.module.model.VocosAutoEncoder |
| channels: 1 |
| z_dim: 128 |
| d_model: 2048 |
| intermediate_dim: 3072 |
| n_fft: 960 |
| hop_length: 480 |
| num_layers: 8 |
| enc_num_layers: 8 |
| input_layer_norm: true |
| final_layer_norm: true |
| stft_normalized: false |
| spec_embed: stft-complex |
| sample_rate: 48000 |
|
|