LegolasTheElf commited on
Commit
685a5d9
·
1 Parent(s): 8af1fcf

Upload Bengali

Browse files
checkpoint-10200/config.json DELETED
@@ -1,116 +0,0 @@
1
- {
2
- "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
- "activation_dropout": 0.0,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.1,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 768,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": true,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": true,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.1,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
- "gradient_checkpointing": false,
56
- "hidden_act": "gelu",
57
- "hidden_dropout": 0.1,
58
- "hidden_size": 1024,
59
- "initializer_range": 0.02,
60
- "intermediate_size": 4096,
61
- "layer_norm_eps": 1e-05,
62
- "layerdrop": 0.1,
63
- "mask_channel_length": 10,
64
- "mask_channel_min_space": 1,
65
- "mask_channel_other": 0.0,
66
- "mask_channel_prob": 0.0,
67
- "mask_channel_selection": "static",
68
- "mask_feature_length": 10,
69
- "mask_feature_min_masks": 0,
70
- "mask_feature_prob": 0.0,
71
- "mask_time_length": 10,
72
- "mask_time_min_masks": 2,
73
- "mask_time_min_space": 1,
74
- "mask_time_other": 0.0,
75
- "mask_time_prob": 0.075,
76
- "mask_time_selection": "static",
77
- "model_type": "wav2vec2",
78
- "num_adapter_layers": 3,
79
- "num_attention_heads": 16,
80
- "num_codevector_groups": 2,
81
- "num_codevectors_per_group": 320,
82
- "num_conv_pos_embedding_groups": 16,
83
- "num_conv_pos_embeddings": 128,
84
- "num_feat_extract_layers": 7,
85
- "num_hidden_layers": 24,
86
- "num_negatives": 100,
87
- "output_hidden_size": 1024,
88
- "pad_token_id": 77,
89
- "proj_codevector_dim": 768,
90
- "tdnn_dilation": [
91
- 1,
92
- 2,
93
- 3,
94
- 1,
95
- 1
96
- ],
97
- "tdnn_dim": [
98
- 512,
99
- 512,
100
- 512,
101
- 512,
102
- 1500
103
- ],
104
- "tdnn_kernel": [
105
- 5,
106
- 3,
107
- 3,
108
- 1,
109
- 1
110
- ],
111
- "torch_dtype": "float32",
112
- "transformers_version": "4.16.0.dev0",
113
- "use_weighted_layer_sum": false,
114
- "vocab_size": 78,
115
- "xvector_output_dim": 512
116
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-10200/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c784b8e0d8f2080ccba1aef92751fbea5aca8c7851291bd1c8ca395e3ab7d37c
3
- size 2490698513
 
 
 
 
checkpoint-10200/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-10200/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bdd98ccfcd38a69a3ed23f8fb873cd4496b59dbde5e93d5e00f13684dae055d
3
- size 1262243441
 
 
 
 
checkpoint-10200/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0ffb48e67ddff7084f538480bfbe92ea3254963a1057a6738573bb2fa14a11b
3
- size 14567
 
 
 
 
checkpoint-10200/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:973a8db7ec7e33e2e2060bd943fc5671ea12110c2893f93cd1939c729a014546
3
- size 559
 
 
 
 
checkpoint-10200/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:65076ff9eeb207a390d7d92cd39cd08e180ba6c2e65d9d5f4bbdcc2ab952a9c5
3
- size 623
 
 
 
 
checkpoint-10200/trainer_state.json DELETED
@@ -1,560 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 9.883506902397675,
5
- "global_step": 10200,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.29,
12
- "learning_rate": 0.00019520934761441092,
13
- "loss": 7.3884,
14
- "step": 300
15
- },
16
- {
17
- "epoch": 0.29,
18
- "eval_cer": 0.9996699598306373,
19
- "eval_loss": 3.533357620239258,
20
- "eval_runtime": 381.6147,
21
- "eval_samples_per_second": 57.697,
22
- "eval_steps_per_second": 7.214,
23
- "eval_wer": 0.9997982352308189,
24
- "step": 300
25
- },
26
- {
27
- "epoch": 0.58,
28
- "learning_rate": 0.00018936708860759497,
29
- "loss": 2.7914,
30
- "step": 600
31
- },
32
- {
33
- "epoch": 0.58,
34
- "eval_cer": 0.3128259689501683,
35
- "eval_loss": 1.0251679420471191,
36
- "eval_runtime": 357.7393,
37
- "eval_samples_per_second": 61.548,
38
- "eval_steps_per_second": 7.696,
39
- "eval_wer": 0.9026014204239751,
40
- "step": 600
41
- },
42
- {
43
- "epoch": 0.87,
44
- "learning_rate": 0.00018352482960077896,
45
- "loss": 1.0845,
46
- "step": 900
47
- },
48
- {
49
- "epoch": 0.87,
50
- "eval_cer": 0.23292367821083487,
51
- "eval_loss": 0.6456783413887024,
52
- "eval_runtime": 359.2262,
53
- "eval_samples_per_second": 61.293,
54
- "eval_steps_per_second": 7.664,
55
- "eval_wer": 0.7633837296890132,
56
- "step": 900
57
- },
58
- {
59
- "epoch": 1.16,
60
- "learning_rate": 0.000177682570593963,
61
- "loss": 0.8596,
62
- "step": 1200
63
- },
64
- {
65
- "epoch": 1.16,
66
- "eval_cer": 0.19856693084355662,
67
- "eval_loss": 0.5056623816490173,
68
- "eval_runtime": 356.2916,
69
- "eval_samples_per_second": 61.798,
70
- "eval_steps_per_second": 7.727,
71
- "eval_wer": 0.7067685354567954,
72
- "step": 1200
73
- },
74
- {
75
- "epoch": 1.45,
76
- "learning_rate": 0.00017184031158714704,
77
- "loss": 0.764,
78
- "step": 1500
79
- },
80
- {
81
- "epoch": 1.45,
82
- "eval_cer": 0.17508630984692217,
83
- "eval_loss": 0.44483131170272827,
84
- "eval_runtime": 356.1556,
85
- "eval_samples_per_second": 61.821,
86
- "eval_steps_per_second": 7.73,
87
- "eval_wer": 0.6668191111589369,
88
- "step": 1500
89
- },
90
- {
91
- "epoch": 1.74,
92
- "learning_rate": 0.00016599805258033109,
93
- "loss": 0.6885,
94
- "step": 1800
95
- },
96
- {
97
- "epoch": 1.74,
98
- "eval_cer": 0.16619476712626208,
99
- "eval_loss": 0.39251354336738586,
100
- "eval_runtime": 356.867,
101
- "eval_samples_per_second": 61.698,
102
- "eval_steps_per_second": 7.714,
103
- "eval_wer": 0.6457010653179813,
104
- "step": 1800
105
- },
106
- {
107
- "epoch": 2.03,
108
- "learning_rate": 0.0001601557935735151,
109
- "loss": 0.6497,
110
- "step": 2100
111
- },
112
- {
113
- "epoch": 2.03,
114
- "eval_cer": 0.1661144284008251,
115
- "eval_loss": 0.3698385953903198,
116
- "eval_runtime": 356.2072,
117
- "eval_samples_per_second": 61.812,
118
- "eval_steps_per_second": 7.729,
119
- "eval_wer": 0.6270580006456472,
120
- "step": 2100
121
- },
122
- {
123
- "epoch": 2.33,
124
- "learning_rate": 0.00015431353456669912,
125
- "loss": 0.5945,
126
- "step": 2400
127
- },
128
- {
129
- "epoch": 2.33,
130
- "eval_cer": 0.1486114428400825,
131
- "eval_loss": 0.34724581241607666,
132
- "eval_runtime": 358.2514,
133
- "eval_samples_per_second": 61.46,
134
- "eval_steps_per_second": 7.685,
135
- "eval_wer": 0.610203916926719,
136
- "step": 2400
137
- },
138
- {
139
- "epoch": 2.62,
140
- "learning_rate": 0.00014847127555988316,
141
- "loss": 0.5678,
142
- "step": 2700
143
- },
144
- {
145
- "epoch": 2.62,
146
- "eval_cer": 0.1441602431874932,
147
- "eval_loss": 0.33363574743270874,
148
- "eval_runtime": 358.3974,
149
- "eval_samples_per_second": 61.435,
150
- "eval_steps_per_second": 7.681,
151
- "eval_wer": 0.5973716776068008,
152
- "step": 2700
153
- },
154
- {
155
- "epoch": 2.91,
156
- "learning_rate": 0.0001426290165530672,
157
- "loss": 0.5481,
158
- "step": 3000
159
- },
160
- {
161
- "epoch": 2.91,
162
- "eval_cer": 0.14050157420475518,
163
- "eval_loss": 0.3159181773662567,
164
- "eval_runtime": 358.6283,
165
- "eval_samples_per_second": 61.395,
166
- "eval_steps_per_second": 7.676,
167
- "eval_wer": 0.5890186161627031,
168
- "step": 3000
169
- },
170
- {
171
- "epoch": 3.2,
172
- "learning_rate": 0.00013678675754625122,
173
- "loss": 0.5168,
174
- "step": 3300
175
- },
176
- {
177
- "epoch": 3.2,
178
- "eval_cer": 0.13693410053197264,
179
- "eval_loss": 0.3075895607471466,
180
- "eval_runtime": 357.1848,
181
- "eval_samples_per_second": 61.643,
182
- "eval_steps_per_second": 7.707,
183
- "eval_wer": 0.5808404175185624,
184
- "step": 3300
185
- },
186
- {
187
- "epoch": 3.49,
188
- "learning_rate": 0.00013094449853943524,
189
- "loss": 0.4999,
190
- "step": 3600
191
- },
192
- {
193
- "epoch": 3.49,
194
- "eval_cer": 0.13465204646618173,
195
- "eval_loss": 0.29860812425613403,
196
- "eval_runtime": 358.4693,
197
- "eval_samples_per_second": 61.422,
198
- "eval_steps_per_second": 7.68,
199
- "eval_wer": 0.5744377488432153,
200
- "step": 3600
201
- },
202
- {
203
- "epoch": 3.78,
204
- "learning_rate": 0.00012510223953261928,
205
- "loss": 0.4848,
206
- "step": 3900
207
- },
208
- {
209
- "epoch": 3.78,
210
- "eval_cer": 0.13505156877646293,
211
- "eval_loss": 0.2938646674156189,
212
- "eval_runtime": 358.234,
213
- "eval_samples_per_second": 61.463,
214
- "eval_steps_per_second": 7.685,
215
- "eval_wer": 0.5665420208759281,
216
- "step": 3900
217
- },
218
- {
219
- "epoch": 4.07,
220
- "learning_rate": 0.00011925998052580331,
221
- "loss": 0.4677,
222
- "step": 4200
223
- },
224
- {
225
- "epoch": 4.07,
226
- "eval_cer": 0.1289154272066008,
227
- "eval_loss": 0.2809064984321594,
228
- "eval_runtime": 356.7771,
229
- "eval_samples_per_second": 61.714,
230
- "eval_steps_per_second": 7.716,
231
- "eval_wer": 0.5597358226622189,
232
- "step": 4200
233
- },
234
- {
235
- "epoch": 4.36,
236
- "learning_rate": 0.00011341772151898735,
237
- "loss": 0.4483,
238
- "step": 4500
239
- },
240
- {
241
- "epoch": 4.36,
242
- "eval_cer": 0.12665508630984693,
243
- "eval_loss": 0.27132293581962585,
244
- "eval_runtime": 355.1486,
245
- "eval_samples_per_second": 61.997,
246
- "eval_steps_per_second": 7.752,
247
- "eval_wer": 0.5531179382330786,
248
- "step": 4500
249
- },
250
- {
251
- "epoch": 4.65,
252
- "learning_rate": 0.00010757546251217138,
253
- "loss": 0.4358,
254
- "step": 4800
255
- },
256
- {
257
- "epoch": 4.65,
258
- "eval_cer": 0.12472261426555206,
259
- "eval_loss": 0.268148809671402,
260
- "eval_runtime": 357.7053,
261
- "eval_samples_per_second": 61.553,
262
- "eval_steps_per_second": 7.696,
263
- "eval_wer": 0.5454239750349725,
264
- "step": 4800
265
- },
266
- {
267
- "epoch": 4.94,
268
- "learning_rate": 0.0001017332035053554,
269
- "loss": 0.4271,
270
- "step": 5100
271
- },
272
- {
273
- "epoch": 4.94,
274
- "eval_cer": 0.12406470524373032,
275
- "eval_loss": 0.25914254784584045,
276
- "eval_runtime": 355.8253,
277
- "eval_samples_per_second": 61.879,
278
- "eval_steps_per_second": 7.737,
279
- "eval_wer": 0.5402318949747121,
280
- "step": 5100
281
- },
282
- {
283
- "epoch": 5.23,
284
- "learning_rate": 9.589094449853944e-05,
285
- "loss": 0.407,
286
- "step": 5400
287
- },
288
- {
289
- "epoch": 5.23,
290
- "eval_cer": 0.12141786993811747,
291
- "eval_loss": 0.25540652871131897,
292
- "eval_runtime": 358.4634,
293
- "eval_samples_per_second": 61.423,
294
- "eval_steps_per_second": 7.68,
295
- "eval_wer": 0.5377838157753148,
296
- "step": 5400
297
- },
298
- {
299
- "epoch": 5.52,
300
- "learning_rate": 9.004868549172347e-05,
301
- "loss": 0.3931,
302
- "step": 5700
303
- },
304
- {
305
- "epoch": 5.52,
306
- "eval_cer": 0.12145695364238411,
307
- "eval_loss": 0.25973308086395264,
308
- "eval_runtime": 356.1836,
309
- "eval_samples_per_second": 61.816,
310
- "eval_steps_per_second": 7.729,
311
- "eval_wer": 0.5346900893145379,
312
- "step": 5700
313
- },
314
- {
315
- "epoch": 5.81,
316
- "learning_rate": 8.42064264849075e-05,
317
- "loss": 0.3945,
318
- "step": 6000
319
- },
320
- {
321
- "epoch": 5.81,
322
- "eval_cer": 0.11911627402019324,
323
- "eval_loss": 0.2458212673664093,
324
- "eval_runtime": 357.5762,
325
- "eval_samples_per_second": 61.576,
326
- "eval_steps_per_second": 7.699,
327
- "eval_wer": 0.5292962444850963,
328
- "step": 6000
329
- },
330
- {
331
- "epoch": 6.1,
332
- "learning_rate": 7.836416747809153e-05,
333
- "loss": 0.3818,
334
- "step": 6300
335
- },
336
- {
337
- "epoch": 6.1,
338
- "eval_cer": 0.12016936271848877,
339
- "eval_loss": 0.24545489251613617,
340
- "eval_runtime": 358.2815,
341
- "eval_samples_per_second": 61.454,
342
- "eval_steps_per_second": 7.684,
343
- "eval_wer": 0.5279645970085011,
344
- "step": 6300
345
- },
346
- {
347
- "epoch": 6.4,
348
- "learning_rate": 7.252190847127556e-05,
349
- "loss": 0.3687,
350
- "step": 6600
351
- },
352
- {
353
- "epoch": 6.4,
354
- "eval_cer": 0.1174161328845945,
355
- "eval_loss": 0.24405910074710846,
356
- "eval_runtime": 358.5525,
357
- "eval_samples_per_second": 61.408,
358
- "eval_steps_per_second": 7.678,
359
- "eval_wer": 0.5244269880555257,
360
- "step": 6600
361
- },
362
- {
363
- "epoch": 6.69,
364
- "learning_rate": 6.667964946445959e-05,
365
- "loss": 0.3627,
366
- "step": 6900
367
- },
368
- {
369
- "epoch": 6.69,
370
- "eval_cer": 0.1159483226576919,
371
- "eval_loss": 0.23746837675571442,
372
- "eval_runtime": 360.1336,
373
- "eval_samples_per_second": 61.138,
374
- "eval_steps_per_second": 7.644,
375
- "eval_wer": 0.5213198106101367,
376
- "step": 6900
377
- },
378
- {
379
- "epoch": 6.98,
380
- "learning_rate": 6.083739045764363e-05,
381
- "loss": 0.3536,
382
- "step": 7200
383
- },
384
- {
385
- "epoch": 6.98,
386
- "eval_cer": 0.11431114971230051,
387
- "eval_loss": 0.23441533744335175,
388
- "eval_runtime": 357.6113,
389
- "eval_samples_per_second": 61.57,
390
- "eval_steps_per_second": 7.698,
391
- "eval_wer": 0.5191542020875928,
392
- "step": 7200
393
- },
394
- {
395
- "epoch": 7.27,
396
- "learning_rate": 5.499513145082765e-05,
397
- "loss": 0.3406,
398
- "step": 7500
399
- },
400
- {
401
- "epoch": 7.27,
402
- "eval_cer": 0.11381391814135274,
403
- "eval_loss": 0.23369348049163818,
404
- "eval_runtime": 355.2971,
405
- "eval_samples_per_second": 61.971,
406
- "eval_steps_per_second": 7.748,
407
- "eval_wer": 0.5165447110728505,
408
- "step": 7500
409
- },
410
- {
411
- "epoch": 7.56,
412
- "learning_rate": 4.915287244401169e-05,
413
- "loss": 0.3407,
414
- "step": 7800
415
- },
416
- {
417
- "epoch": 7.56,
418
- "eval_cer": 0.11317989360547172,
419
- "eval_loss": 0.23301520943641663,
420
- "eval_runtime": 355.7639,
421
- "eval_samples_per_second": 61.889,
422
- "eval_steps_per_second": 7.738,
423
- "eval_wer": 0.5148902399655655,
424
- "step": 7800
425
- },
426
- {
427
- "epoch": 7.85,
428
- "learning_rate": 4.33300876338851e-05,
429
- "loss": 0.3376,
430
- "step": 8100
431
- },
432
- {
433
- "epoch": 7.85,
434
- "eval_cer": 0.11235913581587233,
435
- "eval_loss": 0.22977174818515778,
436
- "eval_runtime": 358.6159,
437
- "eval_samples_per_second": 61.397,
438
- "eval_steps_per_second": 7.677,
439
- "eval_wer": 0.5103976111051329,
440
- "step": 8100
441
- },
442
- {
443
- "epoch": 8.14,
444
- "learning_rate": 3.748782862706913e-05,
445
- "loss": 0.3283,
446
- "step": 8400
447
- },
448
- {
449
- "epoch": 8.14,
450
- "eval_cer": 0.11278905656280534,
451
- "eval_loss": 0.22892726957798004,
452
- "eval_runtime": 358.772,
453
- "eval_samples_per_second": 61.37,
454
- "eval_steps_per_second": 7.673,
455
- "eval_wer": 0.5095501990745722,
456
- "step": 8400
457
- },
458
- {
459
- "epoch": 8.43,
460
- "learning_rate": 3.1645569620253167e-05,
461
- "loss": 0.3192,
462
- "step": 8700
463
- },
464
- {
465
- "epoch": 8.43,
466
- "eval_cer": 0.11249375746390186,
467
- "eval_loss": 0.22378800809383392,
468
- "eval_runtime": 356.1078,
469
- "eval_samples_per_second": 61.83,
470
- "eval_steps_per_second": 7.731,
471
- "eval_wer": 0.5087431399978478,
472
- "step": 8700
473
- },
474
- {
475
- "epoch": 8.72,
476
- "learning_rate": 2.58033106134372e-05,
477
- "loss": 0.3113,
478
- "step": 9000
479
- },
480
- {
481
- "epoch": 8.72,
482
- "eval_cer": 0.11138638584301379,
483
- "eval_loss": 0.2272792011499405,
484
- "eval_runtime": 355.3786,
485
- "eval_samples_per_second": 61.956,
486
- "eval_steps_per_second": 7.747,
487
- "eval_wer": 0.5062950607984504,
488
- "step": 9000
489
- },
490
- {
491
- "epoch": 9.01,
492
- "learning_rate": 1.996105160662123e-05,
493
- "loss": 0.314,
494
- "step": 9300
495
- },
496
- {
497
- "epoch": 9.01,
498
- "eval_cer": 0.1103636955813701,
499
- "eval_loss": 0.223616361618042,
500
- "eval_runtime": 358.619,
501
- "eval_samples_per_second": 61.397,
502
- "eval_steps_per_second": 7.677,
503
- "eval_wer": 0.5051786290756484,
504
- "step": 9300
505
- },
506
- {
507
- "epoch": 9.3,
508
- "learning_rate": 1.411879259980526e-05,
509
- "loss": 0.3049,
510
- "step": 9600
511
- },
512
- {
513
- "epoch": 9.3,
514
- "eval_cer": 0.11000108565845185,
515
- "eval_loss": 0.2234642207622528,
516
- "eval_runtime": 358.9948,
517
- "eval_samples_per_second": 61.332,
518
- "eval_steps_per_second": 7.669,
519
- "eval_wer": 0.5038738835682772,
520
- "step": 9600
521
- },
522
- {
523
- "epoch": 9.59,
524
- "learning_rate": 8.27653359298929e-06,
525
- "loss": 0.3028,
526
- "step": 9900
527
- },
528
- {
529
- "epoch": 9.59,
530
- "eval_cer": 0.10983606557377049,
531
- "eval_loss": 0.22289590537548065,
532
- "eval_runtime": 356.6815,
533
- "eval_samples_per_second": 61.73,
534
- "eval_steps_per_second": 7.718,
535
- "eval_wer": 0.5035510599375874,
536
- "step": 9900
537
- },
538
- {
539
- "epoch": 9.88,
540
- "learning_rate": 2.4342745861733204e-06,
541
- "loss": 0.3026,
542
- "step": 10200
543
- },
544
- {
545
- "epoch": 9.88,
546
- "eval_cer": 0.10945825643252632,
547
- "eval_loss": 0.22115875780582428,
548
- "eval_runtime": 355.7805,
549
- "eval_samples_per_second": 61.886,
550
- "eval_steps_per_second": 7.738,
551
- "eval_wer": 0.503295491229958,
552
- "step": 10200
553
- }
554
- ],
555
- "max_steps": 10320,
556
- "num_train_epochs": 10,
557
- "total_flos": 7.282334872342427e+19,
558
- "trial_name": null,
559
- "trial_params": null
560
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-10200/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9ecf6e57275cb7acc55b94084f6c9d6d092c6f2fffe8083bf55c3c6f45893cd
3
- size 2991