LegolasTheElf commited on
Commit
8af1fcf
·
1 Parent(s): b10ffae

Upload Bengali

Browse files
.ipynb_checkpoints/vocab-checkpoint.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"৮": 0, "্": 1, "ফ": 2, "জ": 3, "ল": 4, "থ": 5, "৬": 6, "য": 7, "খ": 8, "উ": 9, "ূ": 10, "ং": 11, "ই": 12, "ড": 13, "প": 14, "১": 15, "ঠ": 16, "ও": 17, "গ": 18, "ী": 19, "৩": 20, "ঈ": 21, "ঝ": 22, "ৎ": 23, "ু": 24, "ৃ": 25, "ড়": 26, "এ": 27, "৯": 28, "ণ": 29, "ট": 30, "৭": 31, "ষ": 32, "ৗ": 33, "শ": 34, "দ": 35, "ত": 36, "ছ": 37, "ধ": 38, "ক": 39, "ঁ": 40, "ঋ": 41, "য়": 42, "ৰ": 43, "ব": 44, "র": 45, "ঊ": 46, "ঐ": 47, "৪": 48, "ো": 49, "ঞ": 50, "া": 51, "২": 52, "ম": 53, "আ": 54, "ঙ": 55, "ন": 56, "হ": 57, "ঢ়": 58, "ঢ": 59, "ৌ": 60, "চ": 61, "ি": 62, "ৈ": 63, "ঔ": 64, "ঘ": 65, " ": 66, "।": 67, "০": 68, "৫": 69, "স": 70, "়": 71, "ঃ": 72, "ে": 73, "অ": 74, "ভ": 75, "[UNK]": 76, "[PAD]": 77}
checkpoint-10200/config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.1,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.075,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 77,
89
+ "proj_codevector_dim": 768,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.16.0.dev0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 78,
115
+ "xvector_output_dim": 512
116
+ }
checkpoint-10200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c784b8e0d8f2080ccba1aef92751fbea5aca8c7851291bd1c8ca395e3ab7d37c
3
+ size 2490698513
checkpoint-10200/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
checkpoint-10200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bdd98ccfcd38a69a3ed23f8fb873cd4496b59dbde5e93d5e00f13684dae055d
3
+ size 1262243441
checkpoint-10200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0ffb48e67ddff7084f538480bfbe92ea3254963a1057a6738573bb2fa14a11b
3
+ size 14567
checkpoint-10200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:973a8db7ec7e33e2e2060bd943fc5671ea12110c2893f93cd1939c729a014546
3
+ size 559
checkpoint-10200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65076ff9eeb207a390d7d92cd39cd08e180ba6c2e65d9d5f4bbdcc2ab952a9c5
3
+ size 623
checkpoint-10200/trainer_state.json ADDED
@@ -0,0 +1,560 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.883506902397675,
5
+ "global_step": 10200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.29,
12
+ "learning_rate": 0.00019520934761441092,
13
+ "loss": 7.3884,
14
+ "step": 300
15
+ },
16
+ {
17
+ "epoch": 0.29,
18
+ "eval_cer": 0.9996699598306373,
19
+ "eval_loss": 3.533357620239258,
20
+ "eval_runtime": 381.6147,
21
+ "eval_samples_per_second": 57.697,
22
+ "eval_steps_per_second": 7.214,
23
+ "eval_wer": 0.9997982352308189,
24
+ "step": 300
25
+ },
26
+ {
27
+ "epoch": 0.58,
28
+ "learning_rate": 0.00018936708860759497,
29
+ "loss": 2.7914,
30
+ "step": 600
31
+ },
32
+ {
33
+ "epoch": 0.58,
34
+ "eval_cer": 0.3128259689501683,
35
+ "eval_loss": 1.0251679420471191,
36
+ "eval_runtime": 357.7393,
37
+ "eval_samples_per_second": 61.548,
38
+ "eval_steps_per_second": 7.696,
39
+ "eval_wer": 0.9026014204239751,
40
+ "step": 600
41
+ },
42
+ {
43
+ "epoch": 0.87,
44
+ "learning_rate": 0.00018352482960077896,
45
+ "loss": 1.0845,
46
+ "step": 900
47
+ },
48
+ {
49
+ "epoch": 0.87,
50
+ "eval_cer": 0.23292367821083487,
51
+ "eval_loss": 0.6456783413887024,
52
+ "eval_runtime": 359.2262,
53
+ "eval_samples_per_second": 61.293,
54
+ "eval_steps_per_second": 7.664,
55
+ "eval_wer": 0.7633837296890132,
56
+ "step": 900
57
+ },
58
+ {
59
+ "epoch": 1.16,
60
+ "learning_rate": 0.000177682570593963,
61
+ "loss": 0.8596,
62
+ "step": 1200
63
+ },
64
+ {
65
+ "epoch": 1.16,
66
+ "eval_cer": 0.19856693084355662,
67
+ "eval_loss": 0.5056623816490173,
68
+ "eval_runtime": 356.2916,
69
+ "eval_samples_per_second": 61.798,
70
+ "eval_steps_per_second": 7.727,
71
+ "eval_wer": 0.7067685354567954,
72
+ "step": 1200
73
+ },
74
+ {
75
+ "epoch": 1.45,
76
+ "learning_rate": 0.00017184031158714704,
77
+ "loss": 0.764,
78
+ "step": 1500
79
+ },
80
+ {
81
+ "epoch": 1.45,
82
+ "eval_cer": 0.17508630984692217,
83
+ "eval_loss": 0.44483131170272827,
84
+ "eval_runtime": 356.1556,
85
+ "eval_samples_per_second": 61.821,
86
+ "eval_steps_per_second": 7.73,
87
+ "eval_wer": 0.6668191111589369,
88
+ "step": 1500
89
+ },
90
+ {
91
+ "epoch": 1.74,
92
+ "learning_rate": 0.00016599805258033109,
93
+ "loss": 0.6885,
94
+ "step": 1800
95
+ },
96
+ {
97
+ "epoch": 1.74,
98
+ "eval_cer": 0.16619476712626208,
99
+ "eval_loss": 0.39251354336738586,
100
+ "eval_runtime": 356.867,
101
+ "eval_samples_per_second": 61.698,
102
+ "eval_steps_per_second": 7.714,
103
+ "eval_wer": 0.6457010653179813,
104
+ "step": 1800
105
+ },
106
+ {
107
+ "epoch": 2.03,
108
+ "learning_rate": 0.0001601557935735151,
109
+ "loss": 0.6497,
110
+ "step": 2100
111
+ },
112
+ {
113
+ "epoch": 2.03,
114
+ "eval_cer": 0.1661144284008251,
115
+ "eval_loss": 0.3698385953903198,
116
+ "eval_runtime": 356.2072,
117
+ "eval_samples_per_second": 61.812,
118
+ "eval_steps_per_second": 7.729,
119
+ "eval_wer": 0.6270580006456472,
120
+ "step": 2100
121
+ },
122
+ {
123
+ "epoch": 2.33,
124
+ "learning_rate": 0.00015431353456669912,
125
+ "loss": 0.5945,
126
+ "step": 2400
127
+ },
128
+ {
129
+ "epoch": 2.33,
130
+ "eval_cer": 0.1486114428400825,
131
+ "eval_loss": 0.34724581241607666,
132
+ "eval_runtime": 358.2514,
133
+ "eval_samples_per_second": 61.46,
134
+ "eval_steps_per_second": 7.685,
135
+ "eval_wer": 0.610203916926719,
136
+ "step": 2400
137
+ },
138
+ {
139
+ "epoch": 2.62,
140
+ "learning_rate": 0.00014847127555988316,
141
+ "loss": 0.5678,
142
+ "step": 2700
143
+ },
144
+ {
145
+ "epoch": 2.62,
146
+ "eval_cer": 0.1441602431874932,
147
+ "eval_loss": 0.33363574743270874,
148
+ "eval_runtime": 358.3974,
149
+ "eval_samples_per_second": 61.435,
150
+ "eval_steps_per_second": 7.681,
151
+ "eval_wer": 0.5973716776068008,
152
+ "step": 2700
153
+ },
154
+ {
155
+ "epoch": 2.91,
156
+ "learning_rate": 0.0001426290165530672,
157
+ "loss": 0.5481,
158
+ "step": 3000
159
+ },
160
+ {
161
+ "epoch": 2.91,
162
+ "eval_cer": 0.14050157420475518,
163
+ "eval_loss": 0.3159181773662567,
164
+ "eval_runtime": 358.6283,
165
+ "eval_samples_per_second": 61.395,
166
+ "eval_steps_per_second": 7.676,
167
+ "eval_wer": 0.5890186161627031,
168
+ "step": 3000
169
+ },
170
+ {
171
+ "epoch": 3.2,
172
+ "learning_rate": 0.00013678675754625122,
173
+ "loss": 0.5168,
174
+ "step": 3300
175
+ },
176
+ {
177
+ "epoch": 3.2,
178
+ "eval_cer": 0.13693410053197264,
179
+ "eval_loss": 0.3075895607471466,
180
+ "eval_runtime": 357.1848,
181
+ "eval_samples_per_second": 61.643,
182
+ "eval_steps_per_second": 7.707,
183
+ "eval_wer": 0.5808404175185624,
184
+ "step": 3300
185
+ },
186
+ {
187
+ "epoch": 3.49,
188
+ "learning_rate": 0.00013094449853943524,
189
+ "loss": 0.4999,
190
+ "step": 3600
191
+ },
192
+ {
193
+ "epoch": 3.49,
194
+ "eval_cer": 0.13465204646618173,
195
+ "eval_loss": 0.29860812425613403,
196
+ "eval_runtime": 358.4693,
197
+ "eval_samples_per_second": 61.422,
198
+ "eval_steps_per_second": 7.68,
199
+ "eval_wer": 0.5744377488432153,
200
+ "step": 3600
201
+ },
202
+ {
203
+ "epoch": 3.78,
204
+ "learning_rate": 0.00012510223953261928,
205
+ "loss": 0.4848,
206
+ "step": 3900
207
+ },
208
+ {
209
+ "epoch": 3.78,
210
+ "eval_cer": 0.13505156877646293,
211
+ "eval_loss": 0.2938646674156189,
212
+ "eval_runtime": 358.234,
213
+ "eval_samples_per_second": 61.463,
214
+ "eval_steps_per_second": 7.685,
215
+ "eval_wer": 0.5665420208759281,
216
+ "step": 3900
217
+ },
218
+ {
219
+ "epoch": 4.07,
220
+ "learning_rate": 0.00011925998052580331,
221
+ "loss": 0.4677,
222
+ "step": 4200
223
+ },
224
+ {
225
+ "epoch": 4.07,
226
+ "eval_cer": 0.1289154272066008,
227
+ "eval_loss": 0.2809064984321594,
228
+ "eval_runtime": 356.7771,
229
+ "eval_samples_per_second": 61.714,
230
+ "eval_steps_per_second": 7.716,
231
+ "eval_wer": 0.5597358226622189,
232
+ "step": 4200
233
+ },
234
+ {
235
+ "epoch": 4.36,
236
+ "learning_rate": 0.00011341772151898735,
237
+ "loss": 0.4483,
238
+ "step": 4500
239
+ },
240
+ {
241
+ "epoch": 4.36,
242
+ "eval_cer": 0.12665508630984693,
243
+ "eval_loss": 0.27132293581962585,
244
+ "eval_runtime": 355.1486,
245
+ "eval_samples_per_second": 61.997,
246
+ "eval_steps_per_second": 7.752,
247
+ "eval_wer": 0.5531179382330786,
248
+ "step": 4500
249
+ },
250
+ {
251
+ "epoch": 4.65,
252
+ "learning_rate": 0.00010757546251217138,
253
+ "loss": 0.4358,
254
+ "step": 4800
255
+ },
256
+ {
257
+ "epoch": 4.65,
258
+ "eval_cer": 0.12472261426555206,
259
+ "eval_loss": 0.268148809671402,
260
+ "eval_runtime": 357.7053,
261
+ "eval_samples_per_second": 61.553,
262
+ "eval_steps_per_second": 7.696,
263
+ "eval_wer": 0.5454239750349725,
264
+ "step": 4800
265
+ },
266
+ {
267
+ "epoch": 4.94,
268
+ "learning_rate": 0.0001017332035053554,
269
+ "loss": 0.4271,
270
+ "step": 5100
271
+ },
272
+ {
273
+ "epoch": 4.94,
274
+ "eval_cer": 0.12406470524373032,
275
+ "eval_loss": 0.25914254784584045,
276
+ "eval_runtime": 355.8253,
277
+ "eval_samples_per_second": 61.879,
278
+ "eval_steps_per_second": 7.737,
279
+ "eval_wer": 0.5402318949747121,
280
+ "step": 5100
281
+ },
282
+ {
283
+ "epoch": 5.23,
284
+ "learning_rate": 9.589094449853944e-05,
285
+ "loss": 0.407,
286
+ "step": 5400
287
+ },
288
+ {
289
+ "epoch": 5.23,
290
+ "eval_cer": 0.12141786993811747,
291
+ "eval_loss": 0.25540652871131897,
292
+ "eval_runtime": 358.4634,
293
+ "eval_samples_per_second": 61.423,
294
+ "eval_steps_per_second": 7.68,
295
+ "eval_wer": 0.5377838157753148,
296
+ "step": 5400
297
+ },
298
+ {
299
+ "epoch": 5.52,
300
+ "learning_rate": 9.004868549172347e-05,
301
+ "loss": 0.3931,
302
+ "step": 5700
303
+ },
304
+ {
305
+ "epoch": 5.52,
306
+ "eval_cer": 0.12145695364238411,
307
+ "eval_loss": 0.25973308086395264,
308
+ "eval_runtime": 356.1836,
309
+ "eval_samples_per_second": 61.816,
310
+ "eval_steps_per_second": 7.729,
311
+ "eval_wer": 0.5346900893145379,
312
+ "step": 5700
313
+ },
314
+ {
315
+ "epoch": 5.81,
316
+ "learning_rate": 8.42064264849075e-05,
317
+ "loss": 0.3945,
318
+ "step": 6000
319
+ },
320
+ {
321
+ "epoch": 5.81,
322
+ "eval_cer": 0.11911627402019324,
323
+ "eval_loss": 0.2458212673664093,
324
+ "eval_runtime": 357.5762,
325
+ "eval_samples_per_second": 61.576,
326
+ "eval_steps_per_second": 7.699,
327
+ "eval_wer": 0.5292962444850963,
328
+ "step": 6000
329
+ },
330
+ {
331
+ "epoch": 6.1,
332
+ "learning_rate": 7.836416747809153e-05,
333
+ "loss": 0.3818,
334
+ "step": 6300
335
+ },
336
+ {
337
+ "epoch": 6.1,
338
+ "eval_cer": 0.12016936271848877,
339
+ "eval_loss": 0.24545489251613617,
340
+ "eval_runtime": 358.2815,
341
+ "eval_samples_per_second": 61.454,
342
+ "eval_steps_per_second": 7.684,
343
+ "eval_wer": 0.5279645970085011,
344
+ "step": 6300
345
+ },
346
+ {
347
+ "epoch": 6.4,
348
+ "learning_rate": 7.252190847127556e-05,
349
+ "loss": 0.3687,
350
+ "step": 6600
351
+ },
352
+ {
353
+ "epoch": 6.4,
354
+ "eval_cer": 0.1174161328845945,
355
+ "eval_loss": 0.24405910074710846,
356
+ "eval_runtime": 358.5525,
357
+ "eval_samples_per_second": 61.408,
358
+ "eval_steps_per_second": 7.678,
359
+ "eval_wer": 0.5244269880555257,
360
+ "step": 6600
361
+ },
362
+ {
363
+ "epoch": 6.69,
364
+ "learning_rate": 6.667964946445959e-05,
365
+ "loss": 0.3627,
366
+ "step": 6900
367
+ },
368
+ {
369
+ "epoch": 6.69,
370
+ "eval_cer": 0.1159483226576919,
371
+ "eval_loss": 0.23746837675571442,
372
+ "eval_runtime": 360.1336,
373
+ "eval_samples_per_second": 61.138,
374
+ "eval_steps_per_second": 7.644,
375
+ "eval_wer": 0.5213198106101367,
376
+ "step": 6900
377
+ },
378
+ {
379
+ "epoch": 6.98,
380
+ "learning_rate": 6.083739045764363e-05,
381
+ "loss": 0.3536,
382
+ "step": 7200
383
+ },
384
+ {
385
+ "epoch": 6.98,
386
+ "eval_cer": 0.11431114971230051,
387
+ "eval_loss": 0.23441533744335175,
388
+ "eval_runtime": 357.6113,
389
+ "eval_samples_per_second": 61.57,
390
+ "eval_steps_per_second": 7.698,
391
+ "eval_wer": 0.5191542020875928,
392
+ "step": 7200
393
+ },
394
+ {
395
+ "epoch": 7.27,
396
+ "learning_rate": 5.499513145082765e-05,
397
+ "loss": 0.3406,
398
+ "step": 7500
399
+ },
400
+ {
401
+ "epoch": 7.27,
402
+ "eval_cer": 0.11381391814135274,
403
+ "eval_loss": 0.23369348049163818,
404
+ "eval_runtime": 355.2971,
405
+ "eval_samples_per_second": 61.971,
406
+ "eval_steps_per_second": 7.748,
407
+ "eval_wer": 0.5165447110728505,
408
+ "step": 7500
409
+ },
410
+ {
411
+ "epoch": 7.56,
412
+ "learning_rate": 4.915287244401169e-05,
413
+ "loss": 0.3407,
414
+ "step": 7800
415
+ },
416
+ {
417
+ "epoch": 7.56,
418
+ "eval_cer": 0.11317989360547172,
419
+ "eval_loss": 0.23301520943641663,
420
+ "eval_runtime": 355.7639,
421
+ "eval_samples_per_second": 61.889,
422
+ "eval_steps_per_second": 7.738,
423
+ "eval_wer": 0.5148902399655655,
424
+ "step": 7800
425
+ },
426
+ {
427
+ "epoch": 7.85,
428
+ "learning_rate": 4.33300876338851e-05,
429
+ "loss": 0.3376,
430
+ "step": 8100
431
+ },
432
+ {
433
+ "epoch": 7.85,
434
+ "eval_cer": 0.11235913581587233,
435
+ "eval_loss": 0.22977174818515778,
436
+ "eval_runtime": 358.6159,
437
+ "eval_samples_per_second": 61.397,
438
+ "eval_steps_per_second": 7.677,
439
+ "eval_wer": 0.5103976111051329,
440
+ "step": 8100
441
+ },
442
+ {
443
+ "epoch": 8.14,
444
+ "learning_rate": 3.748782862706913e-05,
445
+ "loss": 0.3283,
446
+ "step": 8400
447
+ },
448
+ {
449
+ "epoch": 8.14,
450
+ "eval_cer": 0.11278905656280534,
451
+ "eval_loss": 0.22892726957798004,
452
+ "eval_runtime": 358.772,
453
+ "eval_samples_per_second": 61.37,
454
+ "eval_steps_per_second": 7.673,
455
+ "eval_wer": 0.5095501990745722,
456
+ "step": 8400
457
+ },
458
+ {
459
+ "epoch": 8.43,
460
+ "learning_rate": 3.1645569620253167e-05,
461
+ "loss": 0.3192,
462
+ "step": 8700
463
+ },
464
+ {
465
+ "epoch": 8.43,
466
+ "eval_cer": 0.11249375746390186,
467
+ "eval_loss": 0.22378800809383392,
468
+ "eval_runtime": 356.1078,
469
+ "eval_samples_per_second": 61.83,
470
+ "eval_steps_per_second": 7.731,
471
+ "eval_wer": 0.5087431399978478,
472
+ "step": 8700
473
+ },
474
+ {
475
+ "epoch": 8.72,
476
+ "learning_rate": 2.58033106134372e-05,
477
+ "loss": 0.3113,
478
+ "step": 9000
479
+ },
480
+ {
481
+ "epoch": 8.72,
482
+ "eval_cer": 0.11138638584301379,
483
+ "eval_loss": 0.2272792011499405,
484
+ "eval_runtime": 355.3786,
485
+ "eval_samples_per_second": 61.956,
486
+ "eval_steps_per_second": 7.747,
487
+ "eval_wer": 0.5062950607984504,
488
+ "step": 9000
489
+ },
490
+ {
491
+ "epoch": 9.01,
492
+ "learning_rate": 1.996105160662123e-05,
493
+ "loss": 0.314,
494
+ "step": 9300
495
+ },
496
+ {
497
+ "epoch": 9.01,
498
+ "eval_cer": 0.1103636955813701,
499
+ "eval_loss": 0.223616361618042,
500
+ "eval_runtime": 358.619,
501
+ "eval_samples_per_second": 61.397,
502
+ "eval_steps_per_second": 7.677,
503
+ "eval_wer": 0.5051786290756484,
504
+ "step": 9300
505
+ },
506
+ {
507
+ "epoch": 9.3,
508
+ "learning_rate": 1.411879259980526e-05,
509
+ "loss": 0.3049,
510
+ "step": 9600
511
+ },
512
+ {
513
+ "epoch": 9.3,
514
+ "eval_cer": 0.11000108565845185,
515
+ "eval_loss": 0.2234642207622528,
516
+ "eval_runtime": 358.9948,
517
+ "eval_samples_per_second": 61.332,
518
+ "eval_steps_per_second": 7.669,
519
+ "eval_wer": 0.5038738835682772,
520
+ "step": 9600
521
+ },
522
+ {
523
+ "epoch": 9.59,
524
+ "learning_rate": 8.27653359298929e-06,
525
+ "loss": 0.3028,
526
+ "step": 9900
527
+ },
528
+ {
529
+ "epoch": 9.59,
530
+ "eval_cer": 0.10983606557377049,
531
+ "eval_loss": 0.22289590537548065,
532
+ "eval_runtime": 356.6815,
533
+ "eval_samples_per_second": 61.73,
534
+ "eval_steps_per_second": 7.718,
535
+ "eval_wer": 0.5035510599375874,
536
+ "step": 9900
537
+ },
538
+ {
539
+ "epoch": 9.88,
540
+ "learning_rate": 2.4342745861733204e-06,
541
+ "loss": 0.3026,
542
+ "step": 10200
543
+ },
544
+ {
545
+ "epoch": 9.88,
546
+ "eval_cer": 0.10945825643252632,
547
+ "eval_loss": 0.22115875780582428,
548
+ "eval_runtime": 355.7805,
549
+ "eval_samples_per_second": 61.886,
550
+ "eval_steps_per_second": 7.738,
551
+ "eval_wer": 0.503295491229958,
552
+ "step": 10200
553
+ }
554
+ ],
555
+ "max_steps": 10320,
556
+ "num_train_epochs": 10,
557
+ "total_flos": 7.282334872342427e+19,
558
+ "trial_name": null,
559
+ "trial_params": null
560
+ }
checkpoint-10200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ecf6e57275cb7acc55b94084f6c9d6d092c6f2fffe8083bf55c3c6f45893cd
3
+ size 2991
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.1,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.075,
76
+ "mask_time_selection": "static",
77
+ "model_type": "wav2vec2",
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 16,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 24,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 1024,
88
+ "pad_token_id": 77,
89
+ "proj_codevector_dim": 768,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.16.0.dev0",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 78,
115
+ "xvector_output_dim": 512
116
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e64e5aa8962fa0dcac7a9bc6d6004dfe578eb4975b6c5aedfc64b95d5bfc6d
3
+ size 1262243441
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9ecf6e57275cb7acc55b94084f6c9d6d092c6f2fffe8083bf55c3c6f45893cd
3
+ size 2991