kiritan commited on
Commit
effeebb
·
verified ·
1 Parent(s): 66026a3

Training in progress, step 13000, checkpoint

Browse files
last-checkpoint/global_step13000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c1976b9749dd1ad8141542585466ba658edc3a7f62269542e003ae2aeb36ba
3
+ size 5117197489
last-checkpoint/global_step13000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa193a4fee965a9d1f6e272e88f494d12dddce81bc90c031f850c8285378f5d1
3
+ size 859127933
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step11000
 
1
+ global_step13000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55de6622ea2c12f2865659952fae3e7645ab102a38297690cb4fdbaeb6a9d78f
3
  size 962205216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf44af2fa113c91fad9fddc778811862704ae01a3e9b4a00d3f5dafecefce33
3
  size 962205216
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a42b77849766d934d44019f3aaacdcb7addb89613853b8085a0f3dbdc6ec32df
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b43eb4b8d99ea61b9872ece72aa8086e069fc0c1aa3eba317a3f22a8741d1801
3
  size 14709
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:493d0f530ff7fc5bb7b7e09a1475f8ed1e6010e09c7b8eee02f261c6c00502eb
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94d26cf1abc007265682f393879be534350558ed7250bc2fea3a5b1499b77e9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 83.86610089580387,
3
- "best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-11000",
4
- "epoch": 12.114537444933921,
5
  "eval_steps": 1000,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3197,6 +3197,586 @@
3197
  "eval_steps_per_second": 2.024,
3198
  "eval_wer": 83.86610089580387,
3199
  "step": 11000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3200
  }
3201
  ],
3202
  "logging_steps": 25,
@@ -3216,7 +3796,7 @@
3216
  "attributes": {}
3217
  }
3218
  },
3219
- "total_flos": 1.8899575051391074e+20,
3220
  "train_batch_size": 4,
3221
  "trial_name": null,
3222
  "trial_params": null
 
1
  {
2
+ "best_metric": 83.48892032060348,
3
+ "best_model_checkpoint": "./iteboshi_student_model_temp/checkpoint-12000",
4
+ "epoch": 14.317180616740089,
5
  "eval_steps": 1000,
6
+ "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3197
  "eval_steps_per_second": 2.024,
3198
  "eval_wer": 83.86610089580387,
3199
  "step": 11000
3200
+ },
3201
+ {
3202
+ "epoch": 12.142070484581497,
3203
+ "grad_norm": 0.24581079185009003,
3204
+ "learning_rate": 9.205128205128206e-06,
3205
+ "loss": 0.0142,
3206
+ "step": 11025
3207
+ },
3208
+ {
3209
+ "epoch": 12.169603524229075,
3210
+ "grad_norm": 0.1644822657108307,
3211
+ "learning_rate": 9.17948717948718e-06,
3212
+ "loss": 0.0115,
3213
+ "step": 11050
3214
+ },
3215
+ {
3216
+ "epoch": 12.197136563876652,
3217
+ "grad_norm": 0.0707065761089325,
3218
+ "learning_rate": 9.153846153846155e-06,
3219
+ "loss": 0.0146,
3220
+ "step": 11075
3221
+ },
3222
+ {
3223
+ "epoch": 12.224669603524228,
3224
+ "grad_norm": 0.20994262397289276,
3225
+ "learning_rate": 9.128205128205129e-06,
3226
+ "loss": 0.0118,
3227
+ "step": 11100
3228
+ },
3229
+ {
3230
+ "epoch": 12.252202643171806,
3231
+ "grad_norm": 0.04189275577664375,
3232
+ "learning_rate": 9.102564102564104e-06,
3233
+ "loss": 0.0098,
3234
+ "step": 11125
3235
+ },
3236
+ {
3237
+ "epoch": 12.279735682819384,
3238
+ "grad_norm": 0.2779877781867981,
3239
+ "learning_rate": 9.076923076923078e-06,
3240
+ "loss": 0.0132,
3241
+ "step": 11150
3242
+ },
3243
+ {
3244
+ "epoch": 12.30726872246696,
3245
+ "grad_norm": 0.04877633601427078,
3246
+ "learning_rate": 9.051282051282051e-06,
3247
+ "loss": 0.0125,
3248
+ "step": 11175
3249
+ },
3250
+ {
3251
+ "epoch": 12.334801762114537,
3252
+ "grad_norm": 0.15319667756557465,
3253
+ "learning_rate": 9.025641025641027e-06,
3254
+ "loss": 0.0111,
3255
+ "step": 11200
3256
+ },
3257
+ {
3258
+ "epoch": 12.362334801762115,
3259
+ "grad_norm": 0.5198453664779663,
3260
+ "learning_rate": 9e-06,
3261
+ "loss": 0.0119,
3262
+ "step": 11225
3263
+ },
3264
+ {
3265
+ "epoch": 12.389867841409691,
3266
+ "grad_norm": 0.1989358365535736,
3267
+ "learning_rate": 8.974358974358976e-06,
3268
+ "loss": 0.0112,
3269
+ "step": 11250
3270
+ },
3271
+ {
3272
+ "epoch": 12.417400881057269,
3273
+ "grad_norm": 0.5313608646392822,
3274
+ "learning_rate": 8.94871794871795e-06,
3275
+ "loss": 0.0094,
3276
+ "step": 11275
3277
+ },
3278
+ {
3279
+ "epoch": 12.444933920704846,
3280
+ "grad_norm": 0.17706328630447388,
3281
+ "learning_rate": 8.923076923076925e-06,
3282
+ "loss": 0.0128,
3283
+ "step": 11300
3284
+ },
3285
+ {
3286
+ "epoch": 12.472466960352422,
3287
+ "grad_norm": 0.45485684275627136,
3288
+ "learning_rate": 8.897435897435898e-06,
3289
+ "loss": 0.0136,
3290
+ "step": 11325
3291
+ },
3292
+ {
3293
+ "epoch": 12.5,
3294
+ "grad_norm": 0.14598797261714935,
3295
+ "learning_rate": 8.871794871794872e-06,
3296
+ "loss": 0.0129,
3297
+ "step": 11350
3298
+ },
3299
+ {
3300
+ "epoch": 12.527533039647578,
3301
+ "grad_norm": 0.20234304666519165,
3302
+ "learning_rate": 8.846153846153847e-06,
3303
+ "loss": 0.0133,
3304
+ "step": 11375
3305
+ },
3306
+ {
3307
+ "epoch": 12.555066079295154,
3308
+ "grad_norm": 0.15167205035686493,
3309
+ "learning_rate": 8.820512820512821e-06,
3310
+ "loss": 0.013,
3311
+ "step": 11400
3312
+ },
3313
+ {
3314
+ "epoch": 12.582599118942731,
3315
+ "grad_norm": 0.10332977026700974,
3316
+ "learning_rate": 8.794871794871796e-06,
3317
+ "loss": 0.012,
3318
+ "step": 11425
3319
+ },
3320
+ {
3321
+ "epoch": 12.610132158590309,
3322
+ "grad_norm": 0.06746497750282288,
3323
+ "learning_rate": 8.76923076923077e-06,
3324
+ "loss": 0.0102,
3325
+ "step": 11450
3326
+ },
3327
+ {
3328
+ "epoch": 12.637665198237885,
3329
+ "grad_norm": 0.21585692465305328,
3330
+ "learning_rate": 8.743589743589743e-06,
3331
+ "loss": 0.0131,
3332
+ "step": 11475
3333
+ },
3334
+ {
3335
+ "epoch": 12.665198237885463,
3336
+ "grad_norm": 0.11972617357969284,
3337
+ "learning_rate": 8.717948717948719e-06,
3338
+ "loss": 0.0124,
3339
+ "step": 11500
3340
+ },
3341
+ {
3342
+ "epoch": 12.69273127753304,
3343
+ "grad_norm": 0.16483718156814575,
3344
+ "learning_rate": 8.692307692307692e-06,
3345
+ "loss": 0.0136,
3346
+ "step": 11525
3347
+ },
3348
+ {
3349
+ "epoch": 12.720264317180616,
3350
+ "grad_norm": 0.04187220335006714,
3351
+ "learning_rate": 8.666666666666668e-06,
3352
+ "loss": 0.0122,
3353
+ "step": 11550
3354
+ },
3355
+ {
3356
+ "epoch": 12.747797356828194,
3357
+ "grad_norm": 0.37281060218811035,
3358
+ "learning_rate": 8.641025641025641e-06,
3359
+ "loss": 0.0117,
3360
+ "step": 11575
3361
+ },
3362
+ {
3363
+ "epoch": 12.775330396475772,
3364
+ "grad_norm": 0.42664897441864014,
3365
+ "learning_rate": 8.615384615384617e-06,
3366
+ "loss": 0.0132,
3367
+ "step": 11600
3368
+ },
3369
+ {
3370
+ "epoch": 12.802863436123348,
3371
+ "grad_norm": 0.08013510704040527,
3372
+ "learning_rate": 8.58974358974359e-06,
3373
+ "loss": 0.0129,
3374
+ "step": 11625
3375
+ },
3376
+ {
3377
+ "epoch": 12.830396475770925,
3378
+ "grad_norm": 0.24972140789031982,
3379
+ "learning_rate": 8.564102564102564e-06,
3380
+ "loss": 0.0132,
3381
+ "step": 11650
3382
+ },
3383
+ {
3384
+ "epoch": 12.857929515418503,
3385
+ "grad_norm": 1.2854641675949097,
3386
+ "learning_rate": 8.53846153846154e-06,
3387
+ "loss": 0.0129,
3388
+ "step": 11675
3389
+ },
3390
+ {
3391
+ "epoch": 12.885462555066079,
3392
+ "grad_norm": 0.5620148777961731,
3393
+ "learning_rate": 8.512820512820513e-06,
3394
+ "loss": 0.0165,
3395
+ "step": 11700
3396
+ },
3397
+ {
3398
+ "epoch": 12.912995594713657,
3399
+ "grad_norm": 0.1447388082742691,
3400
+ "learning_rate": 8.487179487179488e-06,
3401
+ "loss": 0.0152,
3402
+ "step": 11725
3403
+ },
3404
+ {
3405
+ "epoch": 12.940528634361234,
3406
+ "grad_norm": 0.17876878380775452,
3407
+ "learning_rate": 8.461538461538462e-06,
3408
+ "loss": 0.015,
3409
+ "step": 11750
3410
+ },
3411
+ {
3412
+ "epoch": 12.96806167400881,
3413
+ "grad_norm": 0.29610171914100647,
3414
+ "learning_rate": 8.435897435897436e-06,
3415
+ "loss": 0.0153,
3416
+ "step": 11775
3417
+ },
3418
+ {
3419
+ "epoch": 12.995594713656388,
3420
+ "grad_norm": 0.1498490869998932,
3421
+ "learning_rate": 8.410256410256411e-06,
3422
+ "loss": 0.0144,
3423
+ "step": 11800
3424
+ },
3425
+ {
3426
+ "epoch": 13.023127753303966,
3427
+ "grad_norm": 0.15733398497104645,
3428
+ "learning_rate": 8.384615384615385e-06,
3429
+ "loss": 0.0099,
3430
+ "step": 11825
3431
+ },
3432
+ {
3433
+ "epoch": 13.050660792951541,
3434
+ "grad_norm": 0.13376560807228088,
3435
+ "learning_rate": 8.35897435897436e-06,
3436
+ "loss": 0.0093,
3437
+ "step": 11850
3438
+ },
3439
+ {
3440
+ "epoch": 13.07819383259912,
3441
+ "grad_norm": 0.30212774872779846,
3442
+ "learning_rate": 8.333333333333334e-06,
3443
+ "loss": 0.0117,
3444
+ "step": 11875
3445
+ },
3446
+ {
3447
+ "epoch": 13.105726872246697,
3448
+ "grad_norm": 0.6191049814224243,
3449
+ "learning_rate": 8.307692307692309e-06,
3450
+ "loss": 0.0088,
3451
+ "step": 11900
3452
+ },
3453
+ {
3454
+ "epoch": 13.133259911894273,
3455
+ "grad_norm": 0.12833338975906372,
3456
+ "learning_rate": 8.282051282051283e-06,
3457
+ "loss": 0.0081,
3458
+ "step": 11925
3459
+ },
3460
+ {
3461
+ "epoch": 13.16079295154185,
3462
+ "grad_norm": 0.02582469768822193,
3463
+ "learning_rate": 8.256410256410256e-06,
3464
+ "loss": 0.0075,
3465
+ "step": 11950
3466
+ },
3467
+ {
3468
+ "epoch": 13.188325991189428,
3469
+ "grad_norm": 0.40551483631134033,
3470
+ "learning_rate": 8.230769230769232e-06,
3471
+ "loss": 0.0106,
3472
+ "step": 11975
3473
+ },
3474
+ {
3475
+ "epoch": 13.215859030837004,
3476
+ "grad_norm": 0.178267240524292,
3477
+ "learning_rate": 8.205128205128205e-06,
3478
+ "loss": 0.0105,
3479
+ "step": 12000
3480
+ },
3481
+ {
3482
+ "epoch": 13.215859030837004,
3483
+ "eval_cer": 23.92385909002327,
3484
+ "eval_loss": 0.8548922538757324,
3485
+ "eval_runtime": 1301.0663,
3486
+ "eval_samples_per_second": 8.133,
3487
+ "eval_steps_per_second": 2.034,
3488
+ "eval_wer": 83.48892032060348,
3489
+ "step": 12000
3490
+ },
3491
+ {
3492
+ "epoch": 13.243392070484582,
3493
+ "grad_norm": 0.25860708951950073,
3494
+ "learning_rate": 8.17948717948718e-06,
3495
+ "loss": 0.0107,
3496
+ "step": 12025
3497
+ },
3498
+ {
3499
+ "epoch": 13.270925110132158,
3500
+ "grad_norm": 0.0770430937409401,
3501
+ "learning_rate": 8.153846153846154e-06,
3502
+ "loss": 0.011,
3503
+ "step": 12050
3504
+ },
3505
+ {
3506
+ "epoch": 13.298458149779735,
3507
+ "grad_norm": 0.2660870850086212,
3508
+ "learning_rate": 8.12820512820513e-06,
3509
+ "loss": 0.0105,
3510
+ "step": 12075
3511
+ },
3512
+ {
3513
+ "epoch": 13.325991189427313,
3514
+ "grad_norm": 0.0884290263056755,
3515
+ "learning_rate": 8.102564102564103e-06,
3516
+ "loss": 0.0116,
3517
+ "step": 12100
3518
+ },
3519
+ {
3520
+ "epoch": 13.353524229074889,
3521
+ "grad_norm": 0.10695531964302063,
3522
+ "learning_rate": 8.076923076923077e-06,
3523
+ "loss": 0.0107,
3524
+ "step": 12125
3525
+ },
3526
+ {
3527
+ "epoch": 13.381057268722467,
3528
+ "grad_norm": 0.03739326447248459,
3529
+ "learning_rate": 8.051282051282052e-06,
3530
+ "loss": 0.0157,
3531
+ "step": 12150
3532
+ },
3533
+ {
3534
+ "epoch": 13.408590308370044,
3535
+ "grad_norm": 0.4565247595310211,
3536
+ "learning_rate": 8.025641025641026e-06,
3537
+ "loss": 0.0126,
3538
+ "step": 12175
3539
+ },
3540
+ {
3541
+ "epoch": 13.43612334801762,
3542
+ "grad_norm": 0.06541293859481812,
3543
+ "learning_rate": 8.000000000000001e-06,
3544
+ "loss": 0.0131,
3545
+ "step": 12200
3546
+ },
3547
+ {
3548
+ "epoch": 13.463656387665198,
3549
+ "grad_norm": 0.10597793757915497,
3550
+ "learning_rate": 7.974358974358975e-06,
3551
+ "loss": 0.0107,
3552
+ "step": 12225
3553
+ },
3554
+ {
3555
+ "epoch": 13.491189427312776,
3556
+ "grad_norm": 0.2990114390850067,
3557
+ "learning_rate": 7.948717948717949e-06,
3558
+ "loss": 0.0107,
3559
+ "step": 12250
3560
+ },
3561
+ {
3562
+ "epoch": 13.518722466960352,
3563
+ "grad_norm": 0.1924736499786377,
3564
+ "learning_rate": 7.923076923076924e-06,
3565
+ "loss": 0.0126,
3566
+ "step": 12275
3567
+ },
3568
+ {
3569
+ "epoch": 13.54625550660793,
3570
+ "grad_norm": 0.13384470343589783,
3571
+ "learning_rate": 7.897435897435898e-06,
3572
+ "loss": 0.0144,
3573
+ "step": 12300
3574
+ },
3575
+ {
3576
+ "epoch": 13.573788546255507,
3577
+ "grad_norm": 0.195987269282341,
3578
+ "learning_rate": 7.871794871794873e-06,
3579
+ "loss": 0.011,
3580
+ "step": 12325
3581
+ },
3582
+ {
3583
+ "epoch": 13.601321585903083,
3584
+ "grad_norm": 0.6262577772140503,
3585
+ "learning_rate": 7.846153846153847e-06,
3586
+ "loss": 0.0102,
3587
+ "step": 12350
3588
+ },
3589
+ {
3590
+ "epoch": 13.62885462555066,
3591
+ "grad_norm": 0.09502260386943817,
3592
+ "learning_rate": 7.820512820512822e-06,
3593
+ "loss": 0.0129,
3594
+ "step": 12375
3595
+ },
3596
+ {
3597
+ "epoch": 13.656387665198238,
3598
+ "grad_norm": 0.14882908761501312,
3599
+ "learning_rate": 7.794871794871796e-06,
3600
+ "loss": 0.0103,
3601
+ "step": 12400
3602
+ },
3603
+ {
3604
+ "epoch": 13.683920704845814,
3605
+ "grad_norm": 0.421539306640625,
3606
+ "learning_rate": 7.76923076923077e-06,
3607
+ "loss": 0.0092,
3608
+ "step": 12425
3609
+ },
3610
+ {
3611
+ "epoch": 13.711453744493392,
3612
+ "grad_norm": 0.04390239343047142,
3613
+ "learning_rate": 7.743589743589745e-06,
3614
+ "loss": 0.0141,
3615
+ "step": 12450
3616
+ },
3617
+ {
3618
+ "epoch": 13.73898678414097,
3619
+ "grad_norm": 0.14362525939941406,
3620
+ "learning_rate": 7.717948717948718e-06,
3621
+ "loss": 0.0108,
3622
+ "step": 12475
3623
+ },
3624
+ {
3625
+ "epoch": 13.766519823788546,
3626
+ "grad_norm": 0.25303810834884644,
3627
+ "learning_rate": 7.692307692307694e-06,
3628
+ "loss": 0.0151,
3629
+ "step": 12500
3630
+ },
3631
+ {
3632
+ "epoch": 13.794052863436123,
3633
+ "grad_norm": 0.37157806754112244,
3634
+ "learning_rate": 7.666666666666667e-06,
3635
+ "loss": 0.0136,
3636
+ "step": 12525
3637
+ },
3638
+ {
3639
+ "epoch": 13.821585903083701,
3640
+ "grad_norm": 0.36168116331100464,
3641
+ "learning_rate": 7.641025641025641e-06,
3642
+ "loss": 0.0139,
3643
+ "step": 12550
3644
+ },
3645
+ {
3646
+ "epoch": 13.849118942731277,
3647
+ "grad_norm": 0.32778996229171753,
3648
+ "learning_rate": 7.615384615384615e-06,
3649
+ "loss": 0.0111,
3650
+ "step": 12575
3651
+ },
3652
+ {
3653
+ "epoch": 13.876651982378855,
3654
+ "grad_norm": 0.16989374160766602,
3655
+ "learning_rate": 7.58974358974359e-06,
3656
+ "loss": 0.0111,
3657
+ "step": 12600
3658
+ },
3659
+ {
3660
+ "epoch": 13.904185022026432,
3661
+ "grad_norm": 0.05724957212805748,
3662
+ "learning_rate": 7.564102564102564e-06,
3663
+ "loss": 0.0125,
3664
+ "step": 12625
3665
+ },
3666
+ {
3667
+ "epoch": 13.931718061674008,
3668
+ "grad_norm": 0.35101988911628723,
3669
+ "learning_rate": 7.538461538461539e-06,
3670
+ "loss": 0.0147,
3671
+ "step": 12650
3672
+ },
3673
+ {
3674
+ "epoch": 13.959251101321586,
3675
+ "grad_norm": 0.0790194496512413,
3676
+ "learning_rate": 7.512820512820513e-06,
3677
+ "loss": 0.013,
3678
+ "step": 12675
3679
+ },
3680
+ {
3681
+ "epoch": 13.986784140969164,
3682
+ "grad_norm": 0.2358320951461792,
3683
+ "learning_rate": 7.487179487179488e-06,
3684
+ "loss": 0.0147,
3685
+ "step": 12700
3686
+ },
3687
+ {
3688
+ "epoch": 14.01431718061674,
3689
+ "grad_norm": 0.06501065939664841,
3690
+ "learning_rate": 7.461538461538462e-06,
3691
+ "loss": 0.0094,
3692
+ "step": 12725
3693
+ },
3694
+ {
3695
+ "epoch": 14.041850220264317,
3696
+ "grad_norm": 0.028322290629148483,
3697
+ "learning_rate": 7.435897435897437e-06,
3698
+ "loss": 0.01,
3699
+ "step": 12750
3700
+ },
3701
+ {
3702
+ "epoch": 14.069383259911895,
3703
+ "grad_norm": 0.04854853078722954,
3704
+ "learning_rate": 7.410256410256411e-06,
3705
+ "loss": 0.0109,
3706
+ "step": 12775
3707
+ },
3708
+ {
3709
+ "epoch": 14.09691629955947,
3710
+ "grad_norm": 0.042353082448244095,
3711
+ "learning_rate": 7.384615384615386e-06,
3712
+ "loss": 0.0069,
3713
+ "step": 12800
3714
+ },
3715
+ {
3716
+ "epoch": 14.124449339207048,
3717
+ "grad_norm": 0.24730762839317322,
3718
+ "learning_rate": 7.35897435897436e-06,
3719
+ "loss": 0.009,
3720
+ "step": 12825
3721
+ },
3722
+ {
3723
+ "epoch": 14.151982378854626,
3724
+ "grad_norm": 0.13546280562877655,
3725
+ "learning_rate": 7.333333333333333e-06,
3726
+ "loss": 0.0082,
3727
+ "step": 12850
3728
+ },
3729
+ {
3730
+ "epoch": 14.179515418502202,
3731
+ "grad_norm": 0.1111743226647377,
3732
+ "learning_rate": 7.307692307692308e-06,
3733
+ "loss": 0.0083,
3734
+ "step": 12875
3735
+ },
3736
+ {
3737
+ "epoch": 14.20704845814978,
3738
+ "grad_norm": 0.0262732096016407,
3739
+ "learning_rate": 7.282051282051282e-06,
3740
+ "loss": 0.007,
3741
+ "step": 12900
3742
+ },
3743
+ {
3744
+ "epoch": 14.234581497797357,
3745
+ "grad_norm": 0.3866499960422516,
3746
+ "learning_rate": 7.256410256410257e-06,
3747
+ "loss": 0.008,
3748
+ "step": 12925
3749
+ },
3750
+ {
3751
+ "epoch": 14.262114537444933,
3752
+ "grad_norm": 0.07846901565790176,
3753
+ "learning_rate": 7.230769230769231e-06,
3754
+ "loss": 0.0069,
3755
+ "step": 12950
3756
+ },
3757
+ {
3758
+ "epoch": 14.289647577092511,
3759
+ "grad_norm": 0.044436316937208176,
3760
+ "learning_rate": 7.205128205128206e-06,
3761
+ "loss": 0.0081,
3762
+ "step": 12975
3763
+ },
3764
+ {
3765
+ "epoch": 14.317180616740089,
3766
+ "grad_norm": 0.18789108097553253,
3767
+ "learning_rate": 7.17948717948718e-06,
3768
+ "loss": 0.0074,
3769
+ "step": 13000
3770
+ },
3771
+ {
3772
+ "epoch": 14.317180616740089,
3773
+ "eval_cer": 24.13134937466326,
3774
+ "eval_loss": 0.8608656525611877,
3775
+ "eval_runtime": 1301.5926,
3776
+ "eval_samples_per_second": 8.129,
3777
+ "eval_steps_per_second": 2.033,
3778
+ "eval_wer": 83.63979255068365,
3779
+ "step": 13000
3780
  }
3781
  ],
3782
  "logging_steps": 25,
 
3796
  "attributes": {}
3797
  }
3798
  },
3799
+ "total_flos": 2.233586142437127e+20,
3800
  "train_batch_size": 4,
3801
  "trial_name": null,
3802
  "trial_params": null