kiritan commited on
Commit
f27e0a8
·
verified ·
1 Parent(s): d82d94a

Training in progress, step 14000, checkpoint

Browse files
last-checkpoint/global_step14000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a2f554ce1ec3d33d02f54c080f37b2dec7e2cc450cf17190781c61293c3d8e
3
+ size 761059696
last-checkpoint/global_step14000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5093ee98781439808af99abf1b4a69c796044bd8d29fcf1271fdffc29bb205
3
+ size 129965712
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step12000
 
1
+ global_step14000
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cfdaaad7311cc963a64154915312068b27d408ccb4dbe0d7a849a3298fcab86
3
  size 181508256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b168574268210f92c9e3c34fcaf08a2e263a9da68774e57a2622b5bb1eb076e
3
  size 181508256
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92fc487271575fb16a5dab13203a006ee4b441af267eba2302c24332dddb9db6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec63f9e79d0c1ca51e7a6a550657e26243aaf847e676e55fcadb7e9922d4621
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5884115119708c269ef22db040f254e6fc9b5f2bf91c9b4125193ea4ee22e90a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a15f6bb9490f63e2fdfa164e81ea571822fffda0ba3283b70da7aa9b56f23b1b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 96.82225365393681,
3
- "best_model_checkpoint": "./iteboshi_temp/checkpoint-12000",
4
- "epoch": 13.215859030837004,
5
  "eval_steps": 1000,
6
- "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3487,6 +3487,586 @@
3487
  "eval_steps_per_second": 3.181,
3488
  "eval_wer": 96.82225365393681,
3489
  "step": 12000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3490
  }
3491
  ],
3492
  "logging_steps": 25,
@@ -3506,7 +4086,7 @@
3506
  "attributes": {}
3507
  }
3508
  },
3509
- "total_flos": 2.025804888042111e+19,
3510
  "train_batch_size": 4,
3511
  "trial_name": null,
3512
  "trial_params": null
 
1
  {
2
+ "best_metric": 96.67138142385667,
3
+ "best_model_checkpoint": "./iteboshi_temp/checkpoint-14000",
4
+ "epoch": 15.418502202643172,
5
  "eval_steps": 1000,
6
+ "global_step": 14000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3487
  "eval_steps_per_second": 3.181,
3488
  "eval_wer": 96.82225365393681,
3489
  "step": 12000
3490
+ },
3491
+ {
3492
+ "epoch": 13.243392070484582,
3493
+ "grad_norm": 0.23276664316654205,
3494
+ "learning_rate": 8.17948717948718e-06,
3495
+ "loss": 0.0127,
3496
+ "step": 12025
3497
+ },
3498
+ {
3499
+ "epoch": 13.270925110132158,
3500
+ "grad_norm": 0.11635535210371017,
3501
+ "learning_rate": 8.153846153846154e-06,
3502
+ "loss": 0.0095,
3503
+ "step": 12050
3504
+ },
3505
+ {
3506
+ "epoch": 13.298458149779735,
3507
+ "grad_norm": 0.16364231705665588,
3508
+ "learning_rate": 8.12820512820513e-06,
3509
+ "loss": 0.0114,
3510
+ "step": 12075
3511
+ },
3512
+ {
3513
+ "epoch": 13.325991189427313,
3514
+ "grad_norm": 0.13409483432769775,
3515
+ "learning_rate": 8.102564102564103e-06,
3516
+ "loss": 0.0122,
3517
+ "step": 12100
3518
+ },
3519
+ {
3520
+ "epoch": 13.353524229074889,
3521
+ "grad_norm": 0.10241974890232086,
3522
+ "learning_rate": 8.076923076923077e-06,
3523
+ "loss": 0.0121,
3524
+ "step": 12125
3525
+ },
3526
+ {
3527
+ "epoch": 13.381057268722467,
3528
+ "grad_norm": 0.07747479528188705,
3529
+ "learning_rate": 8.051282051282052e-06,
3530
+ "loss": 0.0125,
3531
+ "step": 12150
3532
+ },
3533
+ {
3534
+ "epoch": 13.408590308370044,
3535
+ "grad_norm": 0.08573091775178909,
3536
+ "learning_rate": 8.025641025641026e-06,
3537
+ "loss": 0.0125,
3538
+ "step": 12175
3539
+ },
3540
+ {
3541
+ "epoch": 13.43612334801762,
3542
+ "grad_norm": 0.15942828357219696,
3543
+ "learning_rate": 8.000000000000001e-06,
3544
+ "loss": 0.0127,
3545
+ "step": 12200
3546
+ },
3547
+ {
3548
+ "epoch": 13.463656387665198,
3549
+ "grad_norm": 0.22725528478622437,
3550
+ "learning_rate": 7.974358974358975e-06,
3551
+ "loss": 0.0135,
3552
+ "step": 12225
3553
+ },
3554
+ {
3555
+ "epoch": 13.491189427312776,
3556
+ "grad_norm": 0.07710346579551697,
3557
+ "learning_rate": 7.948717948717949e-06,
3558
+ "loss": 0.01,
3559
+ "step": 12250
3560
+ },
3561
+ {
3562
+ "epoch": 13.518722466960352,
3563
+ "grad_norm": 0.06472910940647125,
3564
+ "learning_rate": 7.923076923076924e-06,
3565
+ "loss": 0.0126,
3566
+ "step": 12275
3567
+ },
3568
+ {
3569
+ "epoch": 13.54625550660793,
3570
+ "grad_norm": 0.20756784081459045,
3571
+ "learning_rate": 7.897435897435898e-06,
3572
+ "loss": 0.0135,
3573
+ "step": 12300
3574
+ },
3575
+ {
3576
+ "epoch": 13.573788546255507,
3577
+ "grad_norm": 0.1374279409646988,
3578
+ "learning_rate": 7.871794871794873e-06,
3579
+ "loss": 0.0128,
3580
+ "step": 12325
3581
+ },
3582
+ {
3583
+ "epoch": 13.601321585903083,
3584
+ "grad_norm": 0.08940647542476654,
3585
+ "learning_rate": 7.846153846153847e-06,
3586
+ "loss": 0.0122,
3587
+ "step": 12350
3588
+ },
3589
+ {
3590
+ "epoch": 13.62885462555066,
3591
+ "grad_norm": 0.174547016620636,
3592
+ "learning_rate": 7.820512820512822e-06,
3593
+ "loss": 0.0131,
3594
+ "step": 12375
3595
+ },
3596
+ {
3597
+ "epoch": 13.656387665198238,
3598
+ "grad_norm": 0.08119652420282364,
3599
+ "learning_rate": 7.794871794871796e-06,
3600
+ "loss": 0.0128,
3601
+ "step": 12400
3602
+ },
3603
+ {
3604
+ "epoch": 13.683920704845814,
3605
+ "grad_norm": 0.08605458587408066,
3606
+ "learning_rate": 7.76923076923077e-06,
3607
+ "loss": 0.0121,
3608
+ "step": 12425
3609
+ },
3610
+ {
3611
+ "epoch": 13.711453744493392,
3612
+ "grad_norm": 0.06772664934396744,
3613
+ "learning_rate": 7.743589743589745e-06,
3614
+ "loss": 0.0124,
3615
+ "step": 12450
3616
+ },
3617
+ {
3618
+ "epoch": 13.73898678414097,
3619
+ "grad_norm": 0.1438221037387848,
3620
+ "learning_rate": 7.717948717948718e-06,
3621
+ "loss": 0.0118,
3622
+ "step": 12475
3623
+ },
3624
+ {
3625
+ "epoch": 13.766519823788546,
3626
+ "grad_norm": 0.24703101813793182,
3627
+ "learning_rate": 7.692307692307694e-06,
3628
+ "loss": 0.013,
3629
+ "step": 12500
3630
+ },
3631
+ {
3632
+ "epoch": 13.794052863436123,
3633
+ "grad_norm": 0.06869100034236908,
3634
+ "learning_rate": 7.666666666666667e-06,
3635
+ "loss": 0.0105,
3636
+ "step": 12525
3637
+ },
3638
+ {
3639
+ "epoch": 13.821585903083701,
3640
+ "grad_norm": 0.2140737622976303,
3641
+ "learning_rate": 7.641025641025641e-06,
3642
+ "loss": 0.0153,
3643
+ "step": 12550
3644
+ },
3645
+ {
3646
+ "epoch": 13.849118942731277,
3647
+ "grad_norm": 0.14644251763820648,
3648
+ "learning_rate": 7.615384615384615e-06,
3649
+ "loss": 0.0113,
3650
+ "step": 12575
3651
+ },
3652
+ {
3653
+ "epoch": 13.876651982378855,
3654
+ "grad_norm": 0.0690101683139801,
3655
+ "learning_rate": 7.58974358974359e-06,
3656
+ "loss": 0.0114,
3657
+ "step": 12600
3658
+ },
3659
+ {
3660
+ "epoch": 13.904185022026432,
3661
+ "grad_norm": 0.07625989615917206,
3662
+ "learning_rate": 7.564102564102564e-06,
3663
+ "loss": 0.0117,
3664
+ "step": 12625
3665
+ },
3666
+ {
3667
+ "epoch": 13.931718061674008,
3668
+ "grad_norm": 0.06748715043067932,
3669
+ "learning_rate": 7.538461538461539e-06,
3670
+ "loss": 0.0126,
3671
+ "step": 12650
3672
+ },
3673
+ {
3674
+ "epoch": 13.959251101321586,
3675
+ "grad_norm": 0.11845114827156067,
3676
+ "learning_rate": 7.512820512820513e-06,
3677
+ "loss": 0.0121,
3678
+ "step": 12675
3679
+ },
3680
+ {
3681
+ "epoch": 13.986784140969164,
3682
+ "grad_norm": 0.16305984556674957,
3683
+ "learning_rate": 7.487179487179488e-06,
3684
+ "loss": 0.0142,
3685
+ "step": 12700
3686
+ },
3687
+ {
3688
+ "epoch": 14.01431718061674,
3689
+ "grad_norm": 0.04868720471858978,
3690
+ "learning_rate": 7.461538461538462e-06,
3691
+ "loss": 0.0101,
3692
+ "step": 12725
3693
+ },
3694
+ {
3695
+ "epoch": 14.041850220264317,
3696
+ "grad_norm": 0.19972330331802368,
3697
+ "learning_rate": 7.435897435897437e-06,
3698
+ "loss": 0.0078,
3699
+ "step": 12750
3700
+ },
3701
+ {
3702
+ "epoch": 14.069383259911895,
3703
+ "grad_norm": 0.06579900532960892,
3704
+ "learning_rate": 7.410256410256411e-06,
3705
+ "loss": 0.0097,
3706
+ "step": 12775
3707
+ },
3708
+ {
3709
+ "epoch": 14.09691629955947,
3710
+ "grad_norm": 0.07141165435314178,
3711
+ "learning_rate": 7.384615384615386e-06,
3712
+ "loss": 0.0075,
3713
+ "step": 12800
3714
+ },
3715
+ {
3716
+ "epoch": 14.124449339207048,
3717
+ "grad_norm": 0.0851076990365982,
3718
+ "learning_rate": 7.35897435897436e-06,
3719
+ "loss": 0.0087,
3720
+ "step": 12825
3721
+ },
3722
+ {
3723
+ "epoch": 14.151982378854626,
3724
+ "grad_norm": 0.08377552032470703,
3725
+ "learning_rate": 7.333333333333333e-06,
3726
+ "loss": 0.0097,
3727
+ "step": 12850
3728
+ },
3729
+ {
3730
+ "epoch": 14.179515418502202,
3731
+ "grad_norm": 0.05744962766766548,
3732
+ "learning_rate": 7.307692307692308e-06,
3733
+ "loss": 0.007,
3734
+ "step": 12875
3735
+ },
3736
+ {
3737
+ "epoch": 14.20704845814978,
3738
+ "grad_norm": 0.04978534206748009,
3739
+ "learning_rate": 7.282051282051282e-06,
3740
+ "loss": 0.0079,
3741
+ "step": 12900
3742
+ },
3743
+ {
3744
+ "epoch": 14.234581497797357,
3745
+ "grad_norm": 0.07835716754198074,
3746
+ "learning_rate": 7.256410256410257e-06,
3747
+ "loss": 0.0082,
3748
+ "step": 12925
3749
+ },
3750
+ {
3751
+ "epoch": 14.262114537444933,
3752
+ "grad_norm": 0.1253698170185089,
3753
+ "learning_rate": 7.230769230769231e-06,
3754
+ "loss": 0.0073,
3755
+ "step": 12950
3756
+ },
3757
+ {
3758
+ "epoch": 14.289647577092511,
3759
+ "grad_norm": 0.06475073099136353,
3760
+ "learning_rate": 7.205128205128206e-06,
3761
+ "loss": 0.0076,
3762
+ "step": 12975
3763
+ },
3764
+ {
3765
+ "epoch": 14.317180616740089,
3766
+ "grad_norm": 0.05178418755531311,
3767
+ "learning_rate": 7.17948717948718e-06,
3768
+ "loss": 0.008,
3769
+ "step": 13000
3770
+ },
3771
+ {
3772
+ "epoch": 14.317180616740089,
3773
+ "eval_cer": 57.12803640824,
3774
+ "eval_loss": 0.979995608329773,
3775
+ "eval_runtime": 852.1277,
3776
+ "eval_samples_per_second": 12.417,
3777
+ "eval_steps_per_second": 3.105,
3778
+ "eval_wer": 96.71852899575671,
3779
+ "step": 13000
3780
+ },
3781
+ {
3782
+ "epoch": 14.344713656387665,
3783
+ "grad_norm": 0.07015621662139893,
3784
+ "learning_rate": 7.153846153846155e-06,
3785
+ "loss": 0.0081,
3786
+ "step": 13025
3787
+ },
3788
+ {
3789
+ "epoch": 14.372246696035242,
3790
+ "grad_norm": 0.061675600707530975,
3791
+ "learning_rate": 7.128205128205129e-06,
3792
+ "loss": 0.0076,
3793
+ "step": 13050
3794
+ },
3795
+ {
3796
+ "epoch": 14.39977973568282,
3797
+ "grad_norm": 0.1807292252779007,
3798
+ "learning_rate": 7.102564102564104e-06,
3799
+ "loss": 0.009,
3800
+ "step": 13075
3801
+ },
3802
+ {
3803
+ "epoch": 14.427312775330396,
3804
+ "grad_norm": 0.05348524823784828,
3805
+ "learning_rate": 7.076923076923078e-06,
3806
+ "loss": 0.0085,
3807
+ "step": 13100
3808
+ },
3809
+ {
3810
+ "epoch": 14.454845814977974,
3811
+ "grad_norm": 0.040988489985466,
3812
+ "learning_rate": 7.051282051282053e-06,
3813
+ "loss": 0.0082,
3814
+ "step": 13125
3815
+ },
3816
+ {
3817
+ "epoch": 14.482378854625551,
3818
+ "grad_norm": 0.06997233629226685,
3819
+ "learning_rate": 7.025641025641025e-06,
3820
+ "loss": 0.0081,
3821
+ "step": 13150
3822
+ },
3823
+ {
3824
+ "epoch": 14.509911894273127,
3825
+ "grad_norm": 0.08544085174798965,
3826
+ "learning_rate": 7e-06,
3827
+ "loss": 0.0074,
3828
+ "step": 13175
3829
+ },
3830
+ {
3831
+ "epoch": 14.537444933920705,
3832
+ "grad_norm": 0.16296857595443726,
3833
+ "learning_rate": 6.974358974358974e-06,
3834
+ "loss": 0.0086,
3835
+ "step": 13200
3836
+ },
3837
+ {
3838
+ "epoch": 14.564977973568283,
3839
+ "grad_norm": 0.04533977061510086,
3840
+ "learning_rate": 6.948717948717949e-06,
3841
+ "loss": 0.0074,
3842
+ "step": 13225
3843
+ },
3844
+ {
3845
+ "epoch": 14.592511013215859,
3846
+ "grad_norm": 0.11487758159637451,
3847
+ "learning_rate": 6.923076923076923e-06,
3848
+ "loss": 0.0088,
3849
+ "step": 13250
3850
+ },
3851
+ {
3852
+ "epoch": 14.620044052863436,
3853
+ "grad_norm": 0.05602938309311867,
3854
+ "learning_rate": 6.897435897435898e-06,
3855
+ "loss": 0.0078,
3856
+ "step": 13275
3857
+ },
3858
+ {
3859
+ "epoch": 14.647577092511014,
3860
+ "grad_norm": 0.06876658648252487,
3861
+ "learning_rate": 6.871794871794872e-06,
3862
+ "loss": 0.009,
3863
+ "step": 13300
3864
+ },
3865
+ {
3866
+ "epoch": 14.67511013215859,
3867
+ "grad_norm": 0.21141541004180908,
3868
+ "learning_rate": 6.846153846153847e-06,
3869
+ "loss": 0.008,
3870
+ "step": 13325
3871
+ },
3872
+ {
3873
+ "epoch": 14.702643171806168,
3874
+ "grad_norm": 0.25995275378227234,
3875
+ "learning_rate": 6.820512820512821e-06,
3876
+ "loss": 0.0083,
3877
+ "step": 13350
3878
+ },
3879
+ {
3880
+ "epoch": 14.730176211453745,
3881
+ "grad_norm": 0.07600809633731842,
3882
+ "learning_rate": 6.794871794871796e-06,
3883
+ "loss": 0.0088,
3884
+ "step": 13375
3885
+ },
3886
+ {
3887
+ "epoch": 14.757709251101321,
3888
+ "grad_norm": 0.08052767813205719,
3889
+ "learning_rate": 6.76923076923077e-06,
3890
+ "loss": 0.0086,
3891
+ "step": 13400
3892
+ },
3893
+ {
3894
+ "epoch": 14.785242290748899,
3895
+ "grad_norm": 0.0512065626680851,
3896
+ "learning_rate": 6.743589743589745e-06,
3897
+ "loss": 0.0077,
3898
+ "step": 13425
3899
+ },
3900
+ {
3901
+ "epoch": 14.812775330396477,
3902
+ "grad_norm": 0.06173788756132126,
3903
+ "learning_rate": 6.717948717948718e-06,
3904
+ "loss": 0.0082,
3905
+ "step": 13450
3906
+ },
3907
+ {
3908
+ "epoch": 14.840308370044053,
3909
+ "grad_norm": 0.05340331420302391,
3910
+ "learning_rate": 6.692307692307692e-06,
3911
+ "loss": 0.0082,
3912
+ "step": 13475
3913
+ },
3914
+ {
3915
+ "epoch": 14.86784140969163,
3916
+ "grad_norm": 0.05996181070804596,
3917
+ "learning_rate": 6.666666666666667e-06,
3918
+ "loss": 0.0087,
3919
+ "step": 13500
3920
+ },
3921
+ {
3922
+ "epoch": 14.895374449339208,
3923
+ "grad_norm": 0.05534656345844269,
3924
+ "learning_rate": 6.641025641025641e-06,
3925
+ "loss": 0.0092,
3926
+ "step": 13525
3927
+ },
3928
+ {
3929
+ "epoch": 14.922907488986784,
3930
+ "grad_norm": 0.0450417622923851,
3931
+ "learning_rate": 6.615384615384616e-06,
3932
+ "loss": 0.0072,
3933
+ "step": 13550
3934
+ },
3935
+ {
3936
+ "epoch": 14.950440528634362,
3937
+ "grad_norm": 0.07858394831418991,
3938
+ "learning_rate": 6.58974358974359e-06,
3939
+ "loss": 0.0081,
3940
+ "step": 13575
3941
+ },
3942
+ {
3943
+ "epoch": 14.97797356828194,
3944
+ "grad_norm": 0.12585800886154175,
3945
+ "learning_rate": 6.564102564102565e-06,
3946
+ "loss": 0.0097,
3947
+ "step": 13600
3948
+ },
3949
+ {
3950
+ "epoch": 15.005506607929515,
3951
+ "grad_norm": 0.08572439104318619,
3952
+ "learning_rate": 6.538461538461539e-06,
3953
+ "loss": 0.0096,
3954
+ "step": 13625
3955
+ },
3956
+ {
3957
+ "epoch": 15.033039647577093,
3958
+ "grad_norm": 0.051955390721559525,
3959
+ "learning_rate": 6.512820512820514e-06,
3960
+ "loss": 0.0057,
3961
+ "step": 13650
3962
+ },
3963
+ {
3964
+ "epoch": 15.060572687224669,
3965
+ "grad_norm": 0.04480992630124092,
3966
+ "learning_rate": 6.487179487179488e-06,
3967
+ "loss": 0.0052,
3968
+ "step": 13675
3969
+ },
3970
+ {
3971
+ "epoch": 15.088105726872246,
3972
+ "grad_norm": 0.03293057531118393,
3973
+ "learning_rate": 6.461538461538463e-06,
3974
+ "loss": 0.0057,
3975
+ "step": 13700
3976
+ },
3977
+ {
3978
+ "epoch": 15.115638766519824,
3979
+ "grad_norm": 0.0531000941991806,
3980
+ "learning_rate": 6.435897435897437e-06,
3981
+ "loss": 0.0059,
3982
+ "step": 13725
3983
+ },
3984
+ {
3985
+ "epoch": 15.1431718061674,
3986
+ "grad_norm": 0.030901802703738213,
3987
+ "learning_rate": 6.410256410256412e-06,
3988
+ "loss": 0.0058,
3989
+ "step": 13750
3990
+ },
3991
+ {
3992
+ "epoch": 15.170704845814978,
3993
+ "grad_norm": 0.04868703335523605,
3994
+ "learning_rate": 6.384615384615384e-06,
3995
+ "loss": 0.0056,
3996
+ "step": 13775
3997
+ },
3998
+ {
3999
+ "epoch": 15.198237885462555,
4000
+ "grad_norm": 0.045157793909311295,
4001
+ "learning_rate": 6.358974358974359e-06,
4002
+ "loss": 0.0054,
4003
+ "step": 13800
4004
+ },
4005
+ {
4006
+ "epoch": 15.225770925110131,
4007
+ "grad_norm": 0.07468298077583313,
4008
+ "learning_rate": 6.333333333333333e-06,
4009
+ "loss": 0.0071,
4010
+ "step": 13825
4011
+ },
4012
+ {
4013
+ "epoch": 15.253303964757709,
4014
+ "grad_norm": 0.06386591494083405,
4015
+ "learning_rate": 6.307692307692308e-06,
4016
+ "loss": 0.006,
4017
+ "step": 13850
4018
+ },
4019
+ {
4020
+ "epoch": 15.280837004405287,
4021
+ "grad_norm": 0.04140784963965416,
4022
+ "learning_rate": 6.282051282051282e-06,
4023
+ "loss": 0.006,
4024
+ "step": 13875
4025
+ },
4026
+ {
4027
+ "epoch": 15.308370044052863,
4028
+ "grad_norm": 0.09238462150096893,
4029
+ "learning_rate": 6.256410256410257e-06,
4030
+ "loss": 0.0061,
4031
+ "step": 13900
4032
+ },
4033
+ {
4034
+ "epoch": 15.33590308370044,
4035
+ "grad_norm": 0.2956899106502533,
4036
+ "learning_rate": 6.230769230769231e-06,
4037
+ "loss": 0.006,
4038
+ "step": 13925
4039
+ },
4040
+ {
4041
+ "epoch": 15.363436123348018,
4042
+ "grad_norm": 0.048573561012744904,
4043
+ "learning_rate": 6.205128205128206e-06,
4044
+ "loss": 0.0069,
4045
+ "step": 13950
4046
+ },
4047
+ {
4048
+ "epoch": 15.390969162995594,
4049
+ "grad_norm": 0.17084334790706635,
4050
+ "learning_rate": 6.17948717948718e-06,
4051
+ "loss": 0.0068,
4052
+ "step": 13975
4053
+ },
4054
+ {
4055
+ "epoch": 15.418502202643172,
4056
+ "grad_norm": 0.06327039748430252,
4057
+ "learning_rate": 6.153846153846155e-06,
4058
+ "loss": 0.0062,
4059
+ "step": 14000
4060
+ },
4061
+ {
4062
+ "epoch": 15.418502202643172,
4063
+ "eval_cer": 55.30074627721161,
4064
+ "eval_loss": 0.9948021769523621,
4065
+ "eval_runtime": 827.2524,
4066
+ "eval_samples_per_second": 12.791,
4067
+ "eval_steps_per_second": 3.199,
4068
+ "eval_wer": 96.67138142385667,
4069
+ "step": 14000
4070
  }
4071
  ],
4072
  "logging_steps": 25,
 
4086
  "attributes": {}
4087
  }
4088
  },
4089
+ "total_flos": 2.3634390360491295e+19,
4090
  "train_batch_size": 4,
4091
  "trial_name": null,
4092
  "trial_params": null