eleferrand commited on
Commit
b495c51
·
verified ·
1 Parent(s): 8ee8b58

Upload 10 files

Browse files
config.json ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 768,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": true,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "gradient_checkpointing": false,
57
+ "hidden_act": "gelu",
58
+ "hidden_dropout": 0.0,
59
+ "hidden_size": 1024,
60
+ "initializer_range": 0.02,
61
+ "intermediate_size": 4096,
62
+ "layer_norm_eps": 1e-05,
63
+ "layerdrop": 0.0,
64
+ "mask_channel_length": 10,
65
+ "mask_channel_min_space": 1,
66
+ "mask_channel_other": 0.0,
67
+ "mask_channel_prob": 0.0,
68
+ "mask_channel_selection": "static",
69
+ "mask_feature_length": 10,
70
+ "mask_feature_min_masks": 0,
71
+ "mask_feature_prob": 0.0,
72
+ "mask_time_length": 10,
73
+ "mask_time_min_masks": 2,
74
+ "mask_time_min_space": 1,
75
+ "mask_time_other": 0.0,
76
+ "mask_time_prob": 0.05,
77
+ "mask_time_selection": "static",
78
+ "model_type": "wav2vec2",
79
+ "num_adapter_layers": 3,
80
+ "num_attention_heads": 16,
81
+ "num_codevector_groups": 2,
82
+ "num_codevectors_per_group": 320,
83
+ "num_conv_pos_embedding_groups": 16,
84
+ "num_conv_pos_embeddings": 128,
85
+ "num_feat_extract_layers": 7,
86
+ "num_hidden_layers": 24,
87
+ "num_negatives": 100,
88
+ "output_hidden_size": 1024,
89
+ "pad_token_id": 47,
90
+ "proj_codevector_dim": 768,
91
+ "tdnn_dilation": [
92
+ 1,
93
+ 2,
94
+ 3,
95
+ 1,
96
+ 1
97
+ ],
98
+ "tdnn_dim": [
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 512,
103
+ 1500
104
+ ],
105
+ "tdnn_kernel": [
106
+ 5,
107
+ 3,
108
+ 3,
109
+ 1,
110
+ 1
111
+ ],
112
+ "torch_dtype": "float32",
113
+ "transformers_version": "4.31.0",
114
+ "use_weighted_layer_sum": false,
115
+ "vocab_size": 48,
116
+ "xvector_output_dim": 512
117
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e49423ae2e1ada2d0237448264e2851d694a1522153397ebca1c7a8fcb77f7b
3
+ size 2524152954
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0baecdd74d2098318ba696e9e4168e42662f34cc12f26cd5eadc7e2e058df33
3
+ size 1262099178
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f9cd2939fe37009688536bfe6c7db25d67bd1130856ce64c5d602a85bd206c
3
+ size 14308
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a61b48b66b439ef7fba221aefabda921afbe29a5b5f7dc2733e5a0f31f9a106
3
+ size 1064
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "do_lower_case": false,
5
+ "eos_token": "</s>",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "[PAD]",
8
+ "replace_word_delimiter_char": " ",
9
+ "target_lang": null,
10
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "unk_token": "[UNK]",
12
+ "word_delimiter_token": "|"
13
+ }
trainer_state.json ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 25.0,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_loss": 2.974224090576172,
13
+ "eval_runtime": 5.5654,
14
+ "eval_samples_per_second": 113.379,
15
+ "eval_steps_per_second": 14.195,
16
+ "eval_wer": 1.0,
17
+ "step": 60
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_loss": 2.769304037094116,
22
+ "eval_runtime": 5.1383,
23
+ "eval_samples_per_second": 122.802,
24
+ "eval_steps_per_second": 15.375,
25
+ "eval_wer": 0.99901655466317,
26
+ "step": 120
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_loss": 1.608498215675354,
31
+ "eval_runtime": 5.1567,
32
+ "eval_samples_per_second": 122.366,
33
+ "eval_steps_per_second": 15.32,
34
+ "eval_wer": 0.929191935748238,
35
+ "step": 180
36
+ },
37
+ {
38
+ "epoch": 4.0,
39
+ "eval_loss": 1.4975178241729736,
40
+ "eval_runtime": 5.1972,
41
+ "eval_samples_per_second": 121.412,
42
+ "eval_steps_per_second": 15.201,
43
+ "eval_wer": 0.9142763481396492,
44
+ "step": 240
45
+ },
46
+ {
47
+ "epoch": 5.0,
48
+ "eval_loss": 1.4740536212921143,
49
+ "eval_runtime": 5.1873,
50
+ "eval_samples_per_second": 121.642,
51
+ "eval_steps_per_second": 15.229,
52
+ "eval_wer": 0.8701852155384363,
53
+ "step": 300
54
+ },
55
+ {
56
+ "epoch": 6.0,
57
+ "eval_loss": 1.5547226667404175,
58
+ "eval_runtime": 5.2242,
59
+ "eval_samples_per_second": 120.784,
60
+ "eval_steps_per_second": 15.122,
61
+ "eval_wer": 0.88264218980495,
62
+ "step": 360
63
+ },
64
+ {
65
+ "epoch": 7.0,
66
+ "eval_loss": 1.385581374168396,
67
+ "eval_runtime": 5.194,
68
+ "eval_samples_per_second": 121.486,
69
+ "eval_steps_per_second": 15.21,
70
+ "eval_wer": 0.8431404687756106,
71
+ "step": 420
72
+ },
73
+ {
74
+ "epoch": 8.0,
75
+ "eval_loss": 1.6811991930007935,
76
+ "eval_runtime": 5.2358,
77
+ "eval_samples_per_second": 120.516,
78
+ "eval_steps_per_second": 15.088,
79
+ "eval_wer": 0.8677266021963612,
80
+ "step": 480
81
+ },
82
+ {
83
+ "epoch": 8.33,
84
+ "learning_rate": 0.00021716666666666667,
85
+ "loss": 1.4916,
86
+ "step": 500
87
+ },
88
+ {
89
+ "epoch": 9.0,
90
+ "eval_loss": 1.651587724685669,
91
+ "eval_runtime": 5.1966,
92
+ "eval_samples_per_second": 121.426,
93
+ "eval_steps_per_second": 15.202,
94
+ "eval_wer": 0.8567447959350926,
95
+ "step": 540
96
+ },
97
+ {
98
+ "epoch": 10.0,
99
+ "eval_loss": 1.698971152305603,
100
+ "eval_runtime": 5.2046,
101
+ "eval_samples_per_second": 121.24,
102
+ "eval_steps_per_second": 15.179,
103
+ "eval_wer": 0.8611702999508277,
104
+ "step": 600
105
+ },
106
+ {
107
+ "epoch": 11.0,
108
+ "eval_loss": 1.6570122241973877,
109
+ "eval_runtime": 5.2467,
110
+ "eval_samples_per_second": 120.267,
111
+ "eval_steps_per_second": 15.057,
112
+ "eval_wer": 0.8464186198983773,
113
+ "step": 660
114
+ },
115
+ {
116
+ "epoch": 12.0,
117
+ "eval_loss": 1.7964118719100952,
118
+ "eval_runtime": 5.2019,
119
+ "eval_samples_per_second": 121.302,
120
+ "eval_steps_per_second": 15.187,
121
+ "eval_wer": 0.8636289132929028,
122
+ "step": 720
123
+ },
124
+ {
125
+ "epoch": 13.0,
126
+ "eval_loss": 1.72978937625885,
127
+ "eval_runtime": 5.2508,
128
+ "eval_samples_per_second": 120.172,
129
+ "eval_steps_per_second": 15.045,
130
+ "eval_wer": 0.8367480740862153,
131
+ "step": 780
132
+ },
133
+ {
134
+ "epoch": 14.0,
135
+ "eval_loss": 1.6711727380752563,
136
+ "eval_runtime": 5.2066,
137
+ "eval_samples_per_second": 121.192,
138
+ "eval_steps_per_second": 15.173,
139
+ "eval_wer": 0.8236354695951483,
140
+ "step": 840
141
+ },
142
+ {
143
+ "epoch": 15.0,
144
+ "eval_loss": 1.6306202411651611,
145
+ "eval_runtime": 5.2186,
146
+ "eval_samples_per_second": 120.913,
147
+ "eval_steps_per_second": 15.138,
148
+ "eval_wer": 0.8242910998197017,
149
+ "step": 900
150
+ },
151
+ {
152
+ "epoch": 16.0,
153
+ "eval_loss": 1.6020203828811646,
154
+ "eval_runtime": 5.2523,
155
+ "eval_samples_per_second": 120.138,
156
+ "eval_steps_per_second": 15.041,
157
+ "eval_wer": 0.812817570890018,
158
+ "step": 960
159
+ },
160
+ {
161
+ "epoch": 16.67,
162
+ "learning_rate": 0.00013383333333333332,
163
+ "loss": 0.1786,
164
+ "step": 1000
165
+ },
166
+ {
167
+ "epoch": 17.0,
168
+ "eval_loss": 1.7215255498886108,
169
+ "eval_runtime": 5.2057,
170
+ "eval_samples_per_second": 121.213,
171
+ "eval_steps_per_second": 15.176,
172
+ "eval_wer": 0.8308474020652352,
173
+ "step": 1020
174
+ },
175
+ {
176
+ "epoch": 18.0,
177
+ "eval_loss": 1.7741730213165283,
178
+ "eval_runtime": 5.2226,
179
+ "eval_samples_per_second": 120.821,
180
+ "eval_steps_per_second": 15.127,
181
+ "eval_wer": 0.8380593345353221,
182
+ "step": 1080
183
+ },
184
+ {
185
+ "epoch": 19.0,
186
+ "eval_loss": 1.7588073015213013,
187
+ "eval_runtime": 5.2348,
188
+ "eval_samples_per_second": 120.539,
189
+ "eval_steps_per_second": 15.091,
190
+ "eval_wer": 0.833633830519587,
191
+ "step": 1140
192
+ },
193
+ {
194
+ "epoch": 20.0,
195
+ "eval_loss": 1.7859772443771362,
196
+ "eval_runtime": 5.211,
197
+ "eval_samples_per_second": 121.089,
198
+ "eval_steps_per_second": 15.16,
199
+ "eval_wer": 0.8344533683002786,
200
+ "step": 1200
201
+ },
202
+ {
203
+ "epoch": 21.0,
204
+ "eval_loss": 1.8294274806976318,
205
+ "eval_runtime": 5.2508,
206
+ "eval_samples_per_second": 120.173,
207
+ "eval_steps_per_second": 15.045,
208
+ "eval_wer": 0.8367480740862153,
209
+ "step": 1260
210
+ },
211
+ {
212
+ "epoch": 22.0,
213
+ "eval_loss": 1.7776710987091064,
214
+ "eval_runtime": 5.2165,
215
+ "eval_samples_per_second": 120.962,
216
+ "eval_steps_per_second": 15.144,
217
+ "eval_wer": 0.8226520242583183,
218
+ "step": 1320
219
+ },
220
+ {
221
+ "epoch": 23.0,
222
+ "eval_loss": 1.8306204080581665,
223
+ "eval_runtime": 5.2033,
224
+ "eval_samples_per_second": 121.27,
225
+ "eval_steps_per_second": 15.183,
226
+ "eval_wer": 0.8290444189477135,
227
+ "step": 1380
228
+ },
229
+ {
230
+ "epoch": 24.0,
231
+ "eval_loss": 1.858679175376892,
232
+ "eval_runtime": 5.2509,
233
+ "eval_samples_per_second": 120.17,
234
+ "eval_steps_per_second": 15.045,
235
+ "eval_wer": 0.8247828224881167,
236
+ "step": 1440
237
+ },
238
+ {
239
+ "epoch": 25.0,
240
+ "learning_rate": 5.0499999999999994e-05,
241
+ "loss": 0.1062,
242
+ "step": 1500
243
+ }
244
+ ],
245
+ "max_steps": 1800,
246
+ "num_train_epochs": 30,
247
+ "total_flos": 5.842748350654745e+18,
248
+ "trial_name": null,
249
+ "trial_params": null
250
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa648acdcbc8b36bbccaaa37950c762411d104e57f72f0874b9dc7143f51dc35
3
+ size 4408
vocab.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t": 0,
3
+ "&": 2,
4
+ "'": 3,
5
+ "/": 4,
6
+ "0": 5,
7
+ "1": 6,
8
+ "2": 7,
9
+ "4": 8,
10
+ "5": 9,
11
+ "7": 10,
12
+ "8": 11,
13
+ "9": 12,
14
+ "<": 13,
15
+ ">": 14,
16
+ "@": 15,
17
+ "^": 16,
18
+ "a": 17,
19
+ "c": 18,
20
+ "d": 19,
21
+ "e": 20,
22
+ "f": 21,
23
+ "g": 22,
24
+ "h": 23,
25
+ "i": 24,
26
+ "j": 25,
27
+ "k": 26,
28
+ "l": 27,
29
+ "m": 28,
30
+ "n": 29,
31
+ "o": 30,
32
+ "p": 31,
33
+ "q": 32,
34
+ "r": 33,
35
+ "s": 34,
36
+ "t": 35,
37
+ "u": 36,
38
+ "v": 37,
39
+ "w": 38,
40
+ "x": 39,
41
+ "y": 40,
42
+ "z": 41,
43
+ "|": 1,
44
+ "’": 42,
45
+ "!": 43,
46
+ "?": 44
47
+ }