marofmar commited on
Commit
2f3abc7
·
1 Parent(s): 2ee585c

checkpoint 84 model and tokenizer

Browse files
config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.0,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "Wav2Vec2ForCTC"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "conv_bias": true,
11
+ "conv_dim": [
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512
19
+ ],
20
+ "conv_kernel": [
21
+ 10,
22
+ 3,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 2,
27
+ 2
28
+ ],
29
+ "conv_stride": [
30
+ 5,
31
+ 2,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2
37
+ ],
38
+ "ctc_loss_reduction": "mean",
39
+ "ctc_zero_infinity": false,
40
+ "do_stable_layer_norm": true,
41
+ "eos_token_id": 2,
42
+ "feat_extract_activation": "gelu",
43
+ "feat_extract_dropout": 0.0,
44
+ "feat_extract_norm": "layer",
45
+ "feat_proj_dropout": 0.0,
46
+ "final_dropout": 0.0,
47
+ "gradient_checkpointing": true,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 1024,
51
+ "initializer_range": 0.02,
52
+ "intermediate_size": 4096,
53
+ "layer_norm_eps": 1e-05,
54
+ "layerdrop": 0.1,
55
+ "mask_channel_length": 10,
56
+ "mask_channel_min_space": 1,
57
+ "mask_channel_other": 0.0,
58
+ "mask_channel_prob": 0.0,
59
+ "mask_channel_selection": "static",
60
+ "mask_feature_length": 10,
61
+ "mask_feature_prob": 0.0,
62
+ "mask_time_length": 10,
63
+ "mask_time_min_space": 1,
64
+ "mask_time_other": 0.0,
65
+ "mask_time_prob": 0.05,
66
+ "mask_time_selection": "static",
67
+ "model_type": "wav2vec2",
68
+ "num_attention_heads": 16,
69
+ "num_conv_pos_embedding_groups": 16,
70
+ "num_conv_pos_embeddings": 128,
71
+ "num_feat_extract_layers": 7,
72
+ "num_hidden_layers": 24,
73
+ "pad_token_id": 259,
74
+ "transformers_version": "4.6.0.dev0",
75
+ "vocab_size": 260
76
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bfc6508f4174c9f6359349b9687c28ddab0e09aa92126ab0fa9bcf89e6bb4fb
3
+ size 2492208775
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a8acfe830487f7ddb00a2c4c6db079bdc50bf9d4ba7f6fd18f42934341b452
3
+ size 1262999831
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057999ec38e996dec146de5e28f3aaf30dfbc86f0c54eee12d611f5e3b7423e8
3
+ size 623
trainer_state.json ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 4.255084991455078,
3
+ "best_model_checkpoint": "./xlsr-demo/checkpoint-80",
4
+ "epoch": 28.0,
5
+ "global_step": 84,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.33,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 45.1762,
14
+ "step": 4
15
+ },
16
+ {
17
+ "epoch": 1.33,
18
+ "eval_loss": 32.95661544799805,
19
+ "eval_runtime": 0.6944,
20
+ "eval_samples_per_second": 14.402,
21
+ "eval_wer": 1.0,
22
+ "step": 4
23
+ },
24
+ {
25
+ "epoch": 2.67,
26
+ "learning_rate": 0.00023999999999999998,
27
+ "loss": 48.4094,
28
+ "step": 8
29
+ },
30
+ {
31
+ "epoch": 2.67,
32
+ "eval_loss": 32.567665100097656,
33
+ "eval_runtime": 0.5168,
34
+ "eval_samples_per_second": 19.351,
35
+ "eval_wer": 1.0,
36
+ "step": 8
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "learning_rate": 0.00028941176470588233,
41
+ "loss": 41.452,
42
+ "step": 12
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_loss": 31.335033416748047,
47
+ "eval_runtime": 0.5187,
48
+ "eval_samples_per_second": 19.279,
49
+ "eval_wer": 1.0,
50
+ "step": 12
51
+ },
52
+ {
53
+ "epoch": 5.33,
54
+ "learning_rate": 0.0002788235294117647,
55
+ "loss": 43.7158,
56
+ "step": 16
57
+ },
58
+ {
59
+ "epoch": 5.33,
60
+ "eval_loss": 29.559152603149414,
61
+ "eval_runtime": 0.5095,
62
+ "eval_samples_per_second": 19.627,
63
+ "eval_wer": 1.0,
64
+ "step": 16
65
+ },
66
+ {
67
+ "epoch": 6.67,
68
+ "learning_rate": 0.00026470588235294115,
69
+ "loss": 30.5266,
70
+ "step": 20
71
+ },
72
+ {
73
+ "epoch": 6.67,
74
+ "eval_loss": 18.15114974975586,
75
+ "eval_runtime": 0.5292,
76
+ "eval_samples_per_second": 18.896,
77
+ "eval_wer": 1.0,
78
+ "step": 20
79
+ },
80
+ {
81
+ "epoch": 8.0,
82
+ "learning_rate": 0.0002505882352941176,
83
+ "loss": 27.451,
84
+ "step": 24
85
+ },
86
+ {
87
+ "epoch": 8.0,
88
+ "eval_loss": 13.7584810256958,
89
+ "eval_runtime": 0.5073,
90
+ "eval_samples_per_second": 19.713,
91
+ "eval_wer": 1.0,
92
+ "step": 24
93
+ },
94
+ {
95
+ "epoch": 9.33,
96
+ "learning_rate": 0.0002364705882352941,
97
+ "loss": 19.4205,
98
+ "step": 28
99
+ },
100
+ {
101
+ "epoch": 9.33,
102
+ "eval_loss": 10.164986610412598,
103
+ "eval_runtime": 0.5127,
104
+ "eval_samples_per_second": 19.503,
105
+ "eval_wer": 1.0,
106
+ "step": 28
107
+ },
108
+ {
109
+ "epoch": 10.67,
110
+ "learning_rate": 0.00022235294117647057,
111
+ "loss": 13.2132,
112
+ "step": 32
113
+ },
114
+ {
115
+ "epoch": 10.67,
116
+ "eval_loss": 7.898101806640625,
117
+ "eval_runtime": 0.5031,
118
+ "eval_samples_per_second": 19.877,
119
+ "eval_wer": 1.0,
120
+ "step": 32
121
+ },
122
+ {
123
+ "epoch": 12.0,
124
+ "learning_rate": 0.00020823529411764704,
125
+ "loss": 9.3548,
126
+ "step": 36
127
+ },
128
+ {
129
+ "epoch": 12.0,
130
+ "eval_loss": 6.538975715637207,
131
+ "eval_runtime": 0.5312,
132
+ "eval_samples_per_second": 18.825,
133
+ "eval_wer": 1.0,
134
+ "step": 36
135
+ },
136
+ {
137
+ "epoch": 13.33,
138
+ "learning_rate": 0.0001941176470588235,
139
+ "loss": 8.7253,
140
+ "step": 40
141
+ },
142
+ {
143
+ "epoch": 13.33,
144
+ "eval_loss": 5.714229583740234,
145
+ "eval_runtime": 0.4983,
146
+ "eval_samples_per_second": 20.07,
147
+ "eval_wer": 1.0,
148
+ "step": 40
149
+ },
150
+ {
151
+ "epoch": 14.67,
152
+ "learning_rate": 0.00017999999999999998,
153
+ "loss": 6.8295,
154
+ "step": 44
155
+ },
156
+ {
157
+ "epoch": 14.67,
158
+ "eval_loss": 5.214608192443848,
159
+ "eval_runtime": 0.5112,
160
+ "eval_samples_per_second": 19.561,
161
+ "eval_wer": 1.0,
162
+ "step": 44
163
+ },
164
+ {
165
+ "epoch": 16.0,
166
+ "learning_rate": 0.00016588235294117646,
167
+ "loss": 5.9458,
168
+ "step": 48
169
+ },
170
+ {
171
+ "epoch": 16.0,
172
+ "eval_loss": 4.887683868408203,
173
+ "eval_runtime": 0.5156,
174
+ "eval_samples_per_second": 19.394,
175
+ "eval_wer": 1.0,
176
+ "step": 48
177
+ },
178
+ {
179
+ "epoch": 17.33,
180
+ "learning_rate": 0.00015176470588235293,
181
+ "loss": 5.4214,
182
+ "step": 52
183
+ },
184
+ {
185
+ "epoch": 17.33,
186
+ "eval_loss": 4.679065704345703,
187
+ "eval_runtime": 1.0634,
188
+ "eval_samples_per_second": 9.403,
189
+ "eval_wer": 1.0,
190
+ "step": 52
191
+ },
192
+ {
193
+ "epoch": 18.67,
194
+ "learning_rate": 0.0001376470588235294,
195
+ "loss": 5.8716,
196
+ "step": 56
197
+ },
198
+ {
199
+ "epoch": 18.67,
200
+ "eval_loss": 4.548101425170898,
201
+ "eval_runtime": 0.4987,
202
+ "eval_samples_per_second": 20.051,
203
+ "eval_wer": 1.0,
204
+ "step": 56
205
+ },
206
+ {
207
+ "epoch": 20.0,
208
+ "learning_rate": 0.00012352941176470587,
209
+ "loss": 4.69,
210
+ "step": 60
211
+ },
212
+ {
213
+ "epoch": 20.0,
214
+ "eval_loss": 4.435084342956543,
215
+ "eval_runtime": 0.6371,
216
+ "eval_samples_per_second": 15.695,
217
+ "eval_wer": 1.0,
218
+ "step": 60
219
+ },
220
+ {
221
+ "epoch": 21.33,
222
+ "learning_rate": 0.00010941176470588234,
223
+ "loss": 4.6092,
224
+ "step": 64
225
+ },
226
+ {
227
+ "epoch": 21.33,
228
+ "eval_loss": 4.350079536437988,
229
+ "eval_runtime": 0.5165,
230
+ "eval_samples_per_second": 19.361,
231
+ "eval_wer": 1.0,
232
+ "step": 64
233
+ },
234
+ {
235
+ "epoch": 22.67,
236
+ "learning_rate": 9.529411764705882e-05,
237
+ "loss": 4.9462,
238
+ "step": 68
239
+ },
240
+ {
241
+ "epoch": 22.67,
242
+ "eval_loss": 4.301529884338379,
243
+ "eval_runtime": 0.4931,
244
+ "eval_samples_per_second": 20.278,
245
+ "eval_wer": 1.0,
246
+ "step": 68
247
+ },
248
+ {
249
+ "epoch": 24.0,
250
+ "learning_rate": 8.117647058823529e-05,
251
+ "loss": 4.7406,
252
+ "step": 72
253
+ },
254
+ {
255
+ "epoch": 24.0,
256
+ "eval_loss": 4.267996311187744,
257
+ "eval_runtime": 0.5187,
258
+ "eval_samples_per_second": 19.279,
259
+ "eval_wer": 1.0,
260
+ "step": 72
261
+ },
262
+ {
263
+ "epoch": 25.33,
264
+ "learning_rate": 6.705882352941176e-05,
265
+ "loss": 4.5484,
266
+ "step": 76
267
+ },
268
+ {
269
+ "epoch": 25.33,
270
+ "eval_loss": 4.258701801300049,
271
+ "eval_runtime": 0.5059,
272
+ "eval_samples_per_second": 19.768,
273
+ "eval_wer": 1.0,
274
+ "step": 76
275
+ },
276
+ {
277
+ "epoch": 26.67,
278
+ "learning_rate": 5.294117647058824e-05,
279
+ "loss": 4.2841,
280
+ "step": 80
281
+ },
282
+ {
283
+ "epoch": 26.67,
284
+ "eval_loss": 4.255084991455078,
285
+ "eval_runtime": 0.493,
286
+ "eval_samples_per_second": 20.286,
287
+ "eval_wer": 1.0,
288
+ "step": 80
289
+ },
290
+ {
291
+ "epoch": 28.0,
292
+ "learning_rate": 3.882352941176471e-05,
293
+ "loss": 4.4733,
294
+ "step": 84
295
+ },
296
+ {
297
+ "epoch": 28.0,
298
+ "eval_loss": 4.255660533905029,
299
+ "eval_runtime": 0.5041,
300
+ "eval_samples_per_second": 19.839,
301
+ "eval_wer": 1.0,
302
+ "step": 84
303
+ }
304
+ ],
305
+ "max_steps": 90,
306
+ "num_train_epochs": 30,
307
+ "total_flos": 3.564185651712e+17,
308
+ "trial_name": null,
309
+ "trial_params": null
310
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c2220a89d8cda2c9e9ddfe7fd922e39268d2ceebcd96397807b4516c8d2e156
3
+ size 2351