nqtruong commited on
Commit
9efd02e
·
verified ·
1 Parent(s): 643db6e

Upload 4 files

Browse files
Files changed (4) hide show
  1. hyperparams.yaml +68 -0
  2. input_norm.ckpt +3 -0
  3. label_encoder.txt +6 -0
  4. model.ckpt +3 -0
hyperparams.yaml ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ############################################################################
2
+ # Model: WavLM for Emotion Diarization
3
+ # ############################################################################
4
+
5
+
6
+ # Hparams NEEDED
7
+ HPARAMS_NEEDED: ["window_length", "stride", "encoder_dim", "out_n_neurons", "avg_pool", "label_encoder", "softmax"]
8
+ # Modules Needed
9
+ MODULES_NEEDED: ["wav2vec2", "output_mlp"]
10
+
11
+ # Feature parameters
12
+ wav2vec2_hub: "microsoft/wavlm-large"
13
+
14
+ # Pretrain folder (HuggingFace)
15
+ pretrained_path: speechbrain/emotion-diarization-wavlm-large
16
+
17
+ # parameters
18
+ window_length: 1 # win_len = 0.02 * 1 = 0.02s
19
+ stride: 1 # stride = 0.02 * 1 = 0.02s
20
+ encoder_dim: 1024
21
+ out_n_neurons: 4
22
+
23
+ input_norm: !new:speechbrain.processing.features.InputNormalization
24
+ norm_type: sentence
25
+ std_norm: False
26
+
27
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
28
+ source: !ref <wav2vec2_hub>
29
+ output_norm: True
30
+ freeze: False
31
+ freeze_feature_extractor: True
32
+ save_path: wav2vec2_checkpoint
33
+
34
+ avg_pool: !new:speechbrain.nnet.pooling.Pooling1d
35
+ pool_type: "avg"
36
+ kernel_size: !ref <window_length>
37
+ stride: !ref <stride>
38
+ ceil_mode: True
39
+
40
+ output_mlp: !new:speechbrain.nnet.linear.Linear
41
+ input_size: !ref <encoder_dim>
42
+ n_neurons: !ref <out_n_neurons>
43
+ bias: False
44
+
45
+ model: !new:torch.nn.ModuleList
46
+ - [!ref <output_mlp>]
47
+
48
+ modules:
49
+ input_norm: !ref <input_norm>
50
+ wav2vec2: !ref <wav2vec2>
51
+ output_mlp: !ref <output_mlp>
52
+
53
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
54
+ apply_log: True
55
+
56
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
57
+
58
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
59
+ loadables:
60
+ input_norm: !ref <input_norm>
61
+ wav2vec2: !ref <wav2vec2>
62
+ model: !ref <model>
63
+ label_encoder: !ref <label_encoder>
64
+ paths:
65
+ input_norm: !ref <pretrained_path>/input_norm.ckpt
66
+ wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
67
+ model: !ref <pretrained_path>/model.ckpt
68
+ label_encoder: !ref <pretrained_path>/label_encoder.txt
input_norm.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eddbd59b97a6456c5a81880065b785f731ca3b959abfa2c965658a591e53d31f
3
+ size 1075
label_encoder.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ 'a' => 0
2
+ 'n' => 1
3
+ 'h' => 2
4
+ 's' => 3
5
+ ================
6
+ 'starting_index' => 0
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23c5832103c64cb628e8e56ce5fc7061be323e435a294d34060172c10015208d
3
+ size 17189