mohammadmahdinouri commited on
Commit
ca05e04
·
verified ·
1 Parent(s): 88e26ef

Training in progress, step 11000, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8529ab26bf1cb621752bfdcf39645b857532d7119197ba48911192305222533
3
  size 244223098
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8dddf6de75a2669e45bc92f6a4ca08a65509177a3732a367cbfa1c80daacbe
3
  size 244223098
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb1396f9891f526d75f646a5f9f0af98ffb3ccba3b7c95bab2b3cfb0d0873dcf
3
  size 381944306
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30dee1c1faa1fa9cca0bbdc3497512922f4906f4ab49d60e46fb24c934bb150d
3
  size 381944306
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce9029627a508f9b85fc87ef7d6b828a2c09a14ff0ca8cde1de843bdd1497dca
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe43fa1be8fc23eebf6d0265c9e86d27dbe1a7183ee9ff8d290496f67f7920b
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f14204032a244f46d27ef9476a586602eebf2284e673a724ccecf10784c3b30
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02838e3dd99a981aed96c1e46abb129b6636bb9bdc4bb3b9d32692ead8821881
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:468952d6cc10cfee225ed76168c326b4807a4cfb6b22f7910877aeca614c9cad
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aed6a0b83373d2ca2f6ea1f1ac78752c4b8eb48d2f34a0bffe9748140ee5f947
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7238505da0e2ba8c785fd2f6d8ef0414b8ad9ceebe196b76819975bc121ba9d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0de41f811c47a09044e5ad93b32d48fbc2e808eb9859cb07a66f7923677574e
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1943118567ffe73158246550d682bf973f1c84396959cfa696de01aefce43288
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8fd8d6850b7427eafc7ded0e60d1d7d6419f9660dea8de7c7cbb8cd0dbd9818
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.014813146964193662,
6
  "eval_steps": 500,
7
- "global_step": 10000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3508,6 +3508,356 @@
3508
  "learning_rate": 0.0004976542528181563,
3509
  "loss": 24.1171,
3510
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3511
  }
3512
  ],
3513
  "logging_steps": 20,
@@ -3527,7 +3877,7 @@
3527
  "attributes": {}
3528
  }
3529
  },
3530
- "total_flos": 6.87992380367831e+18,
3531
  "train_batch_size": 48,
3532
  "trial_name": null,
3533
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.016294461660613026,
6
  "eval_steps": 500,
7
+ "global_step": 11000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3508
  "learning_rate": 0.0004976542528181563,
3509
  "loss": 24.1171,
3510
  "step": 10000
3511
+ },
3512
+ {
3513
+ "epoch": 0.014842773258122048,
3514
+ "grad_norm": 19.625,
3515
+ "learning_rate": 0.0004976493138831488,
3516
+ "loss": 24.1736,
3517
+ "step": 10020
3518
+ },
3519
+ {
3520
+ "epoch": 0.014872399552050436,
3521
+ "grad_norm": 18.625,
3522
+ "learning_rate": 0.0004976443749481412,
3523
+ "loss": 24.2905,
3524
+ "step": 10040
3525
+ },
3526
+ {
3527
+ "epoch": 0.014902025845978823,
3528
+ "grad_norm": 18.375,
3529
+ "learning_rate": 0.0004976394360131336,
3530
+ "loss": 24.1324,
3531
+ "step": 10060
3532
+ },
3533
+ {
3534
+ "epoch": 0.014931652139907211,
3535
+ "grad_norm": 20.375,
3536
+ "learning_rate": 0.0004976344970781261,
3537
+ "loss": 24.1785,
3538
+ "step": 10080
3539
+ },
3540
+ {
3541
+ "epoch": 0.014961278433835597,
3542
+ "grad_norm": 17.625,
3543
+ "learning_rate": 0.0004976295581431185,
3544
+ "loss": 24.1872,
3545
+ "step": 10100
3546
+ },
3547
+ {
3548
+ "epoch": 0.014990904727763986,
3549
+ "grad_norm": 18.0,
3550
+ "learning_rate": 0.0004976246192081109,
3551
+ "loss": 24.1813,
3552
+ "step": 10120
3553
+ },
3554
+ {
3555
+ "epoch": 0.015020531021692372,
3556
+ "grad_norm": 21.75,
3557
+ "learning_rate": 0.0004976196802731033,
3558
+ "loss": 24.1438,
3559
+ "step": 10140
3560
+ },
3561
+ {
3562
+ "epoch": 0.01505015731562076,
3563
+ "grad_norm": 22.375,
3564
+ "learning_rate": 0.0004976147413380958,
3565
+ "loss": 24.1436,
3566
+ "step": 10160
3567
+ },
3568
+ {
3569
+ "epoch": 0.015079783609549147,
3570
+ "grad_norm": 19.5,
3571
+ "learning_rate": 0.0004976098024030882,
3572
+ "loss": 24.1394,
3573
+ "step": 10180
3574
+ },
3575
+ {
3576
+ "epoch": 0.015109409903477535,
3577
+ "grad_norm": 18.25,
3578
+ "learning_rate": 0.0004976048634680807,
3579
+ "loss": 24.0992,
3580
+ "step": 10200
3581
+ },
3582
+ {
3583
+ "epoch": 0.015139036197405921,
3584
+ "grad_norm": 15.6875,
3585
+ "learning_rate": 0.0004975999245330731,
3586
+ "loss": 24.0464,
3587
+ "step": 10220
3588
+ },
3589
+ {
3590
+ "epoch": 0.01516866249133431,
3591
+ "grad_norm": 17.875,
3592
+ "learning_rate": 0.0004975949855980656,
3593
+ "loss": 24.0805,
3594
+ "step": 10240
3595
+ },
3596
+ {
3597
+ "epoch": 0.015198288785262696,
3598
+ "grad_norm": 16.875,
3599
+ "learning_rate": 0.000497590046663058,
3600
+ "loss": 24.0985,
3601
+ "step": 10260
3602
+ },
3603
+ {
3604
+ "epoch": 0.015227915079191084,
3605
+ "grad_norm": 23.25,
3606
+ "learning_rate": 0.0004975851077280504,
3607
+ "loss": 24.042,
3608
+ "step": 10280
3609
+ },
3610
+ {
3611
+ "epoch": 0.01525754137311947,
3612
+ "grad_norm": 19.375,
3613
+ "learning_rate": 0.0004975801687930429,
3614
+ "loss": 24.0048,
3615
+ "step": 10300
3616
+ },
3617
+ {
3618
+ "epoch": 0.015287167667047859,
3619
+ "grad_norm": 19.75,
3620
+ "learning_rate": 0.0004975752298580353,
3621
+ "loss": 24.0158,
3622
+ "step": 10320
3623
+ },
3624
+ {
3625
+ "epoch": 0.015316793960976245,
3626
+ "grad_norm": 20.875,
3627
+ "learning_rate": 0.0004975702909230277,
3628
+ "loss": 23.9875,
3629
+ "step": 10340
3630
+ },
3631
+ {
3632
+ "epoch": 0.015346420254904633,
3633
+ "grad_norm": 20.75,
3634
+ "learning_rate": 0.0004975653519880202,
3635
+ "loss": 24.055,
3636
+ "step": 10360
3637
+ },
3638
+ {
3639
+ "epoch": 0.01537604654883302,
3640
+ "grad_norm": 16.125,
3641
+ "learning_rate": 0.0004975604130530126,
3642
+ "loss": 23.9887,
3643
+ "step": 10380
3644
+ },
3645
+ {
3646
+ "epoch": 0.015405672842761408,
3647
+ "grad_norm": 17.25,
3648
+ "learning_rate": 0.000497555474118005,
3649
+ "loss": 24.0268,
3650
+ "step": 10400
3651
+ },
3652
+ {
3653
+ "epoch": 0.015435299136689794,
3654
+ "grad_norm": 18.125,
3655
+ "learning_rate": 0.0004975505351829975,
3656
+ "loss": 24.0453,
3657
+ "step": 10420
3658
+ },
3659
+ {
3660
+ "epoch": 0.015464925430618183,
3661
+ "grad_norm": 21.625,
3662
+ "learning_rate": 0.0004975455962479898,
3663
+ "loss": 24.0189,
3664
+ "step": 10440
3665
+ },
3666
+ {
3667
+ "epoch": 0.015494551724546569,
3668
+ "grad_norm": 15.625,
3669
+ "learning_rate": 0.0004975406573129823,
3670
+ "loss": 23.9409,
3671
+ "step": 10460
3672
+ },
3673
+ {
3674
+ "epoch": 0.015524178018474957,
3675
+ "grad_norm": 18.0,
3676
+ "learning_rate": 0.0004975357183779747,
3677
+ "loss": 23.8996,
3678
+ "step": 10480
3679
+ },
3680
+ {
3681
+ "epoch": 0.015553804312403344,
3682
+ "grad_norm": 21.875,
3683
+ "learning_rate": 0.0004975307794429671,
3684
+ "loss": 24.0183,
3685
+ "step": 10500
3686
+ },
3687
+ {
3688
+ "epoch": 0.015583430606331732,
3689
+ "grad_norm": 19.0,
3690
+ "learning_rate": 0.0004975258405079595,
3691
+ "loss": 23.9392,
3692
+ "step": 10520
3693
+ },
3694
+ {
3695
+ "epoch": 0.015613056900260118,
3696
+ "grad_norm": 16.375,
3697
+ "learning_rate": 0.000497520901572952,
3698
+ "loss": 23.9131,
3699
+ "step": 10540
3700
+ },
3701
+ {
3702
+ "epoch": 0.015642683194188507,
3703
+ "grad_norm": 17.5,
3704
+ "learning_rate": 0.0004975159626379444,
3705
+ "loss": 23.8963,
3706
+ "step": 10560
3707
+ },
3708
+ {
3709
+ "epoch": 0.015672309488116893,
3710
+ "grad_norm": 16.625,
3711
+ "learning_rate": 0.0004975110237029369,
3712
+ "loss": 23.9361,
3713
+ "step": 10580
3714
+ },
3715
+ {
3716
+ "epoch": 0.01570193578204528,
3717
+ "grad_norm": 18.25,
3718
+ "learning_rate": 0.0004975060847679293,
3719
+ "loss": 23.9129,
3720
+ "step": 10600
3721
+ },
3722
+ {
3723
+ "epoch": 0.01573156207597367,
3724
+ "grad_norm": 19.75,
3725
+ "learning_rate": 0.0004975011458329218,
3726
+ "loss": 23.8795,
3727
+ "step": 10620
3728
+ },
3729
+ {
3730
+ "epoch": 0.015761188369902056,
3731
+ "grad_norm": 14.8125,
3732
+ "learning_rate": 0.0004974962068979142,
3733
+ "loss": 23.8412,
3734
+ "step": 10640
3735
+ },
3736
+ {
3737
+ "epoch": 0.015790814663830442,
3738
+ "grad_norm": 19.625,
3739
+ "learning_rate": 0.0004974912679629066,
3740
+ "loss": 23.8545,
3741
+ "step": 10660
3742
+ },
3743
+ {
3744
+ "epoch": 0.01582044095775883,
3745
+ "grad_norm": 17.875,
3746
+ "learning_rate": 0.0004974863290278991,
3747
+ "loss": 23.8848,
3748
+ "step": 10680
3749
+ },
3750
+ {
3751
+ "epoch": 0.01585006725168722,
3752
+ "grad_norm": 18.125,
3753
+ "learning_rate": 0.0004974813900928915,
3754
+ "loss": 23.7463,
3755
+ "step": 10700
3756
+ },
3757
+ {
3758
+ "epoch": 0.015879693545615605,
3759
+ "grad_norm": 17.25,
3760
+ "learning_rate": 0.0004974764511578839,
3761
+ "loss": 23.8657,
3762
+ "step": 10720
3763
+ },
3764
+ {
3765
+ "epoch": 0.01590931983954399,
3766
+ "grad_norm": 17.875,
3767
+ "learning_rate": 0.0004974715122228763,
3768
+ "loss": 23.7865,
3769
+ "step": 10740
3770
+ },
3771
+ {
3772
+ "epoch": 0.015938946133472378,
3773
+ "grad_norm": 18.875,
3774
+ "learning_rate": 0.0004974665732878688,
3775
+ "loss": 23.7971,
3776
+ "step": 10760
3777
+ },
3778
+ {
3779
+ "epoch": 0.015968572427400768,
3780
+ "grad_norm": 19.125,
3781
+ "learning_rate": 0.0004974616343528612,
3782
+ "loss": 23.8342,
3783
+ "step": 10780
3784
+ },
3785
+ {
3786
+ "epoch": 0.015998198721329154,
3787
+ "grad_norm": 16.75,
3788
+ "learning_rate": 0.0004974566954178537,
3789
+ "loss": 23.7571,
3790
+ "step": 10800
3791
+ },
3792
+ {
3793
+ "epoch": 0.01602782501525754,
3794
+ "grad_norm": 16.75,
3795
+ "learning_rate": 0.0004974517564828461,
3796
+ "loss": 23.8034,
3797
+ "step": 10820
3798
+ },
3799
+ {
3800
+ "epoch": 0.016057451309185927,
3801
+ "grad_norm": 17.25,
3802
+ "learning_rate": 0.0004974468175478386,
3803
+ "loss": 23.7763,
3804
+ "step": 10840
3805
+ },
3806
+ {
3807
+ "epoch": 0.016087077603114317,
3808
+ "grad_norm": 18.25,
3809
+ "learning_rate": 0.000497441878612831,
3810
+ "loss": 23.803,
3811
+ "step": 10860
3812
+ },
3813
+ {
3814
+ "epoch": 0.016116703897042704,
3815
+ "grad_norm": 20.875,
3816
+ "learning_rate": 0.0004974369396778234,
3817
+ "loss": 23.7222,
3818
+ "step": 10880
3819
+ },
3820
+ {
3821
+ "epoch": 0.01614633019097109,
3822
+ "grad_norm": 20.5,
3823
+ "learning_rate": 0.0004974320007428159,
3824
+ "loss": 23.6994,
3825
+ "step": 10900
3826
+ },
3827
+ {
3828
+ "epoch": 0.016175956484899476,
3829
+ "grad_norm": 15.3125,
3830
+ "learning_rate": 0.0004974270618078083,
3831
+ "loss": 23.6471,
3832
+ "step": 10920
3833
+ },
3834
+ {
3835
+ "epoch": 0.016205582778827866,
3836
+ "grad_norm": 15.5,
3837
+ "learning_rate": 0.0004974221228728007,
3838
+ "loss": 23.7271,
3839
+ "step": 10940
3840
+ },
3841
+ {
3842
+ "epoch": 0.016235209072756253,
3843
+ "grad_norm": 17.5,
3844
+ "learning_rate": 0.0004974171839377932,
3845
+ "loss": 23.6869,
3846
+ "step": 10960
3847
+ },
3848
+ {
3849
+ "epoch": 0.01626483536668464,
3850
+ "grad_norm": 16.75,
3851
+ "learning_rate": 0.0004974122450027856,
3852
+ "loss": 23.6976,
3853
+ "step": 10980
3854
+ },
3855
+ {
3856
+ "epoch": 0.016294461660613026,
3857
+ "grad_norm": 19.0,
3858
+ "learning_rate": 0.0004974073060677781,
3859
+ "loss": 23.6657,
3860
+ "step": 11000
3861
  }
3862
  ],
3863
  "logging_steps": 20,
 
3877
  "attributes": {}
3878
  }
3879
  },
3880
+ "total_flos": 7.567919072411648e+18,
3881
  "train_batch_size": 48,
3882
  "trial_name": null,
3883
  "trial_params": null