| model: |
| class_path: linacodec.model.LinaCodecModel |
| init_args: |
| config: |
| |
| local_ssl_layers: [6, 9] |
| global_ssl_layers: [1, 2] |
| normalize_ssl_features: true |
|
|
| |
| downsample_factor: 4 |
| mel_upsample_factor: 8 |
| use_conv_downsample: true |
| mel_interpolation_mode: linear |
|
|
| |
| sample_rate: 24000 |
| n_fft: 1024 |
| hop_length: 256 |
| n_mels: 100 |
| padding: center |
|
|
| ssl_feature_extractor: |
| class_path: linacodec.module.ssl_extractor.SSLFeatureExtractor |
| init_args: |
| model_name: wavlm_base_plus |
| output_layer: 2 |
| sample_rate: 24000 |
|
|
| local_encoder: |
| class_path: linacodec.module.transformer.Transformer |
| init_args: |
| dim: 768 |
| n_layers: 6 |
| n_heads: 12 |
| window_size: 125 |
| use_rope: true |
| rope_theta: 10000.0 |
| max_seq_len: 512 |
| use_flash_attention: true |
|
|
| local_quantizer: |
| class_path: linacodec.module.fsq.FiniteScalarQuantizer |
| init_args: |
| input_dim: 768 |
| output_dim: 768 |
| levels: [8, 8, 8, 5, 5] |
|
|
| feature_decoder: |
| class_path: linacodec.module.transformer.Transformer |
| init_args: |
| dim: 768 |
| n_layers: 6 |
| n_heads: 12 |
| window_size: 125 |
| use_rope: true |
| rope_theta: 10000.0 |
| max_seq_len: 512 |
| use_flash_attention: true |
|
|
| global_encoder: |
| class_path: linacodec.module.global_encoder.GlobalEncoder |
| init_args: |
| input_channels: 768 |
| output_channels: 128 |
| num_layers: 4 |
| dim: 384 |
| intermediate_dim: 1152 |
|
|
| mel_prenet: |
| class_path: linacodec.module.transformer.Transformer |
| init_args: |
| dim: 768 |
| output_dim: 512 |
| n_layers: 6 |
| n_heads: 12 |
| window_size: 31 |
| use_rope: true |
| rope_theta: 10000.0 |
| max_seq_len: 512 |
| use_flash_attention: true |
|
|
| mel_decoder: |
| class_path: linacodec.module.transformer.Transformer |
| init_args: |
| dim: 512 |
| output_dim: 100 |
| n_layers: 6 |
| n_heads: 8 |
| window_size: 65 |
| use_rope: true |
| rope_theta: 10000.0 |
| max_seq_len: 512 |
| adanorm_condition_dim: 128 |
| use_adaln_zero: true |
| use_flash_attention: true |
|
|
| mel_postnet: |
| class_path: linacodec.module.postnet.PostNet |
| init_args: |
| input_channels: 100 |
| channels: 256 |
| kernel_size: 7 |
| num_layers: 4 |
| use_layer_norm: true |
|
|