| | |
| | |
| | |
| |
|
| |
|
| | |
| | HPARAMS_NEEDED: ["encoder_dim", "out_n_neurons", "label_encoder", "softmax"] |
| | |
| | MODULES_NEEDED: ["wav2vec2", "avg_pool", "output_mlp"] |
| |
|
| | |
| | wav2vec2_hub: microsoft/wavlm-base-plus-sv |
| |
|
| | |
| | pretrained_path: Porjaz/wavlm-base-emo-fi |
| |
|
| | |
| | encoder_dim: 768 |
| | out_n_neurons: 5 |
| |
|
| | wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2 |
| | source: !ref <wav2vec2_hub> |
| | output_norm: True |
| | freeze: True |
| | save_path: wav2vec2_checkpoints |
| |
|
| | avg_pool: !new:speechbrain.nnet.pooling.StatisticsPooling |
| | return_std: False |
| |
|
| | label_lin: !new:speechbrain.nnet.linear.Linear |
| | input_size: !ref <encoder_dim> |
| | n_neurons: !ref <out_n_neurons> |
| | bias: False |
| |
|
| | model: !new:torch.nn.ModuleList |
| | - [!ref <label_lin>] |
| |
|
| | modules: |
| | wav2vec2: !ref <wav2vec2> |
| | label_lin: !ref <label_lin> |
| | avg_pool: !ref <avg_pool> |
| |
|
| | softmax: !new:speechbrain.nnet.activations.Softmax |
| |
|
| | |
| | label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder |
| |
|
| |
|
| | pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
| | loadables: |
| | wav2vec2: !ref <wav2vec2> |
| | model: !ref <model> |
| | label_encoder: !ref <label_encoder> |
| | paths: |
| | wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt |
| | model: !ref <pretrained_path>/model.ckpt |
| | label_encoder: !ref <pretrained_path>/label_encoder.txt |
| |
|
| |
|
| |
|