Billyyy commited on
Commit
2ca9fb7
·
verified ·
1 Parent(s): 1261a2b

Training in progress, step 5493, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4ce5b1352e184a82241e863626c4a62ae31746f2d8177ffeaa8982c170f6186
3
  size 2718107304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2807387c3f7c038eca212dca41a58ecfff1755585862e7e2318b6286dd29cb8f
3
  size 2718107304
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a213c86e42b52fb36eb4043f5ff5fb85cd6c856634144ab014cad96b1d38a5f1
3
  size 145486330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71dd57ecc32f710a2531c2b41f6cbb162801c9ad0f9bb31b277daffb7fe2f9b4
3
  size 145486330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f8c6a28f5372544493b855a8a168867b253fd6506322bb138a55ab7d729a0e7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ad39c2fd71a09f27709f37c0a489f4c2b0a997a89343f75cb61234192319689
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7b957bbd71ba88b1567b81dfc55fdf0d0c49eef2d758e0863ecbda488001df2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2455594c5b90eff022ef3ec1c714caddd8dcf4c8dacec82303fc8c5605b9f1d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9102494083378846,
5
  "eval_steps": 1000,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3547,6 +3547,349 @@
3547
  "eval_samples_per_second": 9.646,
3548
  "eval_steps_per_second": 1.206,
3549
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3550
  }
3551
  ],
3552
  "logging_steps": 10,
@@ -3561,12 +3904,12 @@
3561
  "should_evaluate": false,
3562
  "should_log": false,
3563
  "should_save": true,
3564
- "should_training_stop": false
3565
  },
3566
  "attributes": {}
3567
  }
3568
  },
3569
- "total_flos": 1.16907232985088e+18,
3570
  "train_batch_size": 4,
3571
  "trial_name": null,
3572
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 1000,
6
+ "global_step": 5493,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3547
  "eval_samples_per_second": 9.646,
3548
  "eval_steps_per_second": 1.206,
3549
  "step": 5000
3550
+ },
3551
+ {
3552
+ "epoch": 0.9120699071545604,
3553
+ "grad_norm": 4.849244117736816,
3554
+ "learning_rate": 2.291214703757982e-06,
3555
+ "loss": 2.3958,
3556
+ "step": 5010
3557
+ },
3558
+ {
3559
+ "epoch": 0.9138904059712362,
3560
+ "grad_norm": 4.8128204345703125,
3561
+ "learning_rate": 2.1980167035280163e-06,
3562
+ "loss": 2.4288,
3563
+ "step": 5020
3564
+ },
3565
+ {
3566
+ "epoch": 0.9157109047879118,
3567
+ "grad_norm": 5.573403835296631,
3568
+ "learning_rate": 2.1067111388414163e-06,
3569
+ "loss": 2.4134,
3570
+ "step": 5030
3571
+ },
3572
+ {
3573
+ "epoch": 0.9175314036045876,
3574
+ "grad_norm": 4.9487504959106445,
3575
+ "learning_rate": 2.0173016243995866e-06,
3576
+ "loss": 2.4095,
3577
+ "step": 5040
3578
+ },
3579
+ {
3580
+ "epoch": 0.9193519024212634,
3581
+ "grad_norm": 4.933927536010742,
3582
+ "learning_rate": 1.929791699841066e-06,
3583
+ "loss": 2.4014,
3584
+ "step": 5050
3585
+ },
3586
+ {
3587
+ "epoch": 0.9211724012379392,
3588
+ "grad_norm": 5.116062641143799,
3589
+ "learning_rate": 1.844184829601453e-06,
3590
+ "loss": 2.4196,
3591
+ "step": 5060
3592
+ },
3593
+ {
3594
+ "epoch": 0.922992900054615,
3595
+ "grad_norm": 4.888516902923584,
3596
+ "learning_rate": 1.7604844027761802e-06,
3597
+ "loss": 2.4418,
3598
+ "step": 5070
3599
+ },
3600
+ {
3601
+ "epoch": 0.9248133988712908,
3602
+ "grad_norm": 4.990447998046875,
3603
+ "learning_rate": 1.6786937329864027e-06,
3604
+ "loss": 2.4049,
3605
+ "step": 5080
3606
+ },
3607
+ {
3608
+ "epoch": 0.9266338976879666,
3609
+ "grad_norm": 4.672518253326416,
3610
+ "learning_rate": 1.5988160582477818e-06,
3611
+ "loss": 2.3873,
3612
+ "step": 5090
3613
+ },
3614
+ {
3615
+ "epoch": 0.9284543965046422,
3616
+ "grad_norm": 5.029353618621826,
3617
+ "learning_rate": 1.5208545408423092e-06,
3618
+ "loss": 2.4754,
3619
+ "step": 5100
3620
+ },
3621
+ {
3622
+ "epoch": 0.930274895321318,
3623
+ "grad_norm": 4.660059928894043,
3624
+ "learning_rate": 1.444812267193102e-06,
3625
+ "loss": 2.4081,
3626
+ "step": 5110
3627
+ },
3628
+ {
3629
+ "epoch": 0.9320953941379938,
3630
+ "grad_norm": 5.001034259796143,
3631
+ "learning_rate": 1.3706922477422336e-06,
3632
+ "loss": 2.4014,
3633
+ "step": 5120
3634
+ },
3635
+ {
3636
+ "epoch": 0.9339158929546696,
3637
+ "grad_norm": 5.1275858879089355,
3638
+ "learning_rate": 1.2984974168315234e-06,
3639
+ "loss": 2.4251,
3640
+ "step": 5130
3641
+ },
3642
+ {
3643
+ "epoch": 0.9357363917713454,
3644
+ "grad_norm": 4.893324375152588,
3645
+ "learning_rate": 1.2282306325864135e-06,
3646
+ "loss": 2.4196,
3647
+ "step": 5140
3648
+ },
3649
+ {
3650
+ "epoch": 0.9375568905880212,
3651
+ "grad_norm": 4.734968662261963,
3652
+ "learning_rate": 1.1598946768027863e-06,
3653
+ "loss": 2.401,
3654
+ "step": 5150
3655
+ },
3656
+ {
3657
+ "epoch": 0.9393773894046968,
3658
+ "grad_norm": 4.66255521774292,
3659
+ "learning_rate": 1.0934922548368254e-06,
3660
+ "loss": 2.3846,
3661
+ "step": 5160
3662
+ },
3663
+ {
3664
+ "epoch": 0.9411978882213726,
3665
+ "grad_norm": 4.771427631378174,
3666
+ "learning_rate": 1.0290259954979397e-06,
3667
+ "loss": 2.3953,
3668
+ "step": 5170
3669
+ },
3670
+ {
3671
+ "epoch": 0.9430183870380484,
3672
+ "grad_norm": 4.673166275024414,
3673
+ "learning_rate": 9.664984509446917e-07,
3674
+ "loss": 2.3694,
3675
+ "step": 5180
3676
+ },
3677
+ {
3678
+ "epoch": 0.9448388858547242,
3679
+ "grad_norm": 4.778134346008301,
3680
+ "learning_rate": 9.059120965837331e-07,
3681
+ "loss": 2.3948,
3682
+ "step": 5190
3683
+ },
3684
+ {
3685
+ "epoch": 0.9466593846714,
3686
+ "grad_norm": 4.706231594085693,
3687
+ "learning_rate": 8.472693309718283e-07,
3688
+ "loss": 2.4153,
3689
+ "step": 5200
3690
+ },
3691
+ {
3692
+ "epoch": 0.9484798834880758,
3693
+ "grad_norm": 4.645259380340576,
3694
+ "learning_rate": 7.905724757208965e-07,
3695
+ "loss": 2.3806,
3696
+ "step": 5210
3697
+ },
3698
+ {
3699
+ "epoch": 0.9503003823047516,
3700
+ "grad_norm": 5.04796838760376,
3701
+ "learning_rate": 7.358237754060915e-07,
3702
+ "loss": 2.454,
3703
+ "step": 5220
3704
+ },
3705
+ {
3706
+ "epoch": 0.9521208811214272,
3707
+ "grad_norm": 4.7881646156311035,
3708
+ "learning_rate": 6.830253974769496e-07,
3709
+ "loss": 2.4161,
3710
+ "step": 5230
3711
+ },
3712
+ {
3713
+ "epoch": 0.953941379938103,
3714
+ "grad_norm": 4.7254743576049805,
3715
+ "learning_rate": 6.321794321715757e-07,
3716
+ "loss": 2.4715,
3717
+ "step": 5240
3718
+ },
3719
+ {
3720
+ "epoch": 0.9557618787547788,
3721
+ "grad_norm": 5.13754415512085,
3722
+ "learning_rate": 5.832878924338869e-07,
3723
+ "loss": 2.4191,
3724
+ "step": 5250
3725
+ },
3726
+ {
3727
+ "epoch": 0.9575823775714546,
3728
+ "grad_norm": 4.781599998474121,
3729
+ "learning_rate": 5.363527138339597e-07,
3730
+ "loss": 2.4127,
3731
+ "step": 5260
3732
+ },
3733
+ {
3734
+ "epoch": 0.9594028763881304,
3735
+ "grad_norm": 4.541421413421631,
3736
+ "learning_rate": 4.913757544913355e-07,
3737
+ "loss": 2.3908,
3738
+ "step": 5270
3739
+ },
3740
+ {
3741
+ "epoch": 0.9612233752048062,
3742
+ "grad_norm": 5.078845500946045,
3743
+ "learning_rate": 4.4835879500153556e-07,
3744
+ "loss": 2.4303,
3745
+ "step": 5280
3746
+ },
3747
+ {
3748
+ "epoch": 0.9630438740214818,
3749
+ "grad_norm": 4.745322227478027,
3750
+ "learning_rate": 4.0730353836549993e-07,
3751
+ "loss": 2.4046,
3752
+ "step": 5290
3753
+ },
3754
+ {
3755
+ "epoch": 0.9648643728381576,
3756
+ "grad_norm": 4.688536643981934,
3757
+ "learning_rate": 3.6821160992221993e-07,
3758
+ "loss": 2.4456,
3759
+ "step": 5300
3760
+ },
3761
+ {
3762
+ "epoch": 0.9666848716548334,
3763
+ "grad_norm": 4.9088592529296875,
3764
+ "learning_rate": 3.310845572843557e-07,
3765
+ "loss": 2.3846,
3766
+ "step": 5310
3767
+ },
3768
+ {
3769
+ "epoch": 0.9685053704715092,
3770
+ "grad_norm": 5.126766681671143,
3771
+ "learning_rate": 2.959238502769912e-07,
3772
+ "loss": 2.4093,
3773
+ "step": 5320
3774
+ },
3775
+ {
3776
+ "epoch": 0.970325869288185,
3777
+ "grad_norm": 4.49152946472168,
3778
+ "learning_rate": 2.6273088087943597e-07,
3779
+ "loss": 2.3837,
3780
+ "step": 5330
3781
+ },
3782
+ {
3783
+ "epoch": 0.9721463681048608,
3784
+ "grad_norm": 4.944559097290039,
3785
+ "learning_rate": 2.315069631701139e-07,
3786
+ "loss": 2.3791,
3787
+ "step": 5340
3788
+ },
3789
+ {
3790
+ "epoch": 0.9739668669215366,
3791
+ "grad_norm": 4.91040563583374,
3792
+ "learning_rate": 2.022533332745602e-07,
3793
+ "loss": 2.4035,
3794
+ "step": 5350
3795
+ },
3796
+ {
3797
+ "epoch": 0.9757873657382122,
3798
+ "grad_norm": 4.91538143157959,
3799
+ "learning_rate": 1.7497114931644965e-07,
3800
+ "loss": 2.4057,
3801
+ "step": 5360
3802
+ },
3803
+ {
3804
+ "epoch": 0.977607864554888,
3805
+ "grad_norm": 5.63076114654541,
3806
+ "learning_rate": 1.496614913717831e-07,
3807
+ "loss": 2.3627,
3808
+ "step": 5370
3809
+ },
3810
+ {
3811
+ "epoch": 0.9794283633715638,
3812
+ "grad_norm": 4.944591045379639,
3813
+ "learning_rate": 1.2632536142609397e-07,
3814
+ "loss": 2.3662,
3815
+ "step": 5380
3816
+ },
3817
+ {
3818
+ "epoch": 0.9812488621882396,
3819
+ "grad_norm": 4.864638328552246,
3820
+ "learning_rate": 1.0496368333482442e-07,
3821
+ "loss": 2.3704,
3822
+ "step": 5390
3823
+ },
3824
+ {
3825
+ "epoch": 0.9830693610049154,
3826
+ "grad_norm": 4.991931438446045,
3827
+ "learning_rate": 8.557730278669906e-08,
3828
+ "loss": 2.3767,
3829
+ "step": 5400
3830
+ },
3831
+ {
3832
+ "epoch": 0.9848898598215912,
3833
+ "grad_norm": 4.382468223571777,
3834
+ "learning_rate": 6.816698727029614e-08,
3835
+ "loss": 2.4112,
3836
+ "step": 5410
3837
+ },
3838
+ {
3839
+ "epoch": 0.9867103586382668,
3840
+ "grad_norm": 44.841453552246094,
3841
+ "learning_rate": 5.273342604361631e-08,
3842
+ "loss": 2.4092,
3843
+ "step": 5420
3844
+ },
3845
+ {
3846
+ "epoch": 0.9885308574549426,
3847
+ "grad_norm": 4.815988063812256,
3848
+ "learning_rate": 3.9277230106832264e-08,
3849
+ "loss": 2.4256,
3850
+ "step": 5430
3851
+ },
3852
+ {
3853
+ "epoch": 0.9903513562716184,
3854
+ "grad_norm": 4.87392520904541,
3855
+ "learning_rate": 2.7798932178080274e-08,
3856
+ "loss": 2.3936,
3857
+ "step": 5440
3858
+ },
3859
+ {
3860
+ "epoch": 0.9921718550882942,
3861
+ "grad_norm": 5.1465559005737305,
3862
+ "learning_rate": 1.829898667237151e-08,
3863
+ "loss": 2.3805,
3864
+ "step": 5450
3865
+ },
3866
+ {
3867
+ "epoch": 0.99399235390497,
3868
+ "grad_norm": 4.486802101135254,
3869
+ "learning_rate": 1.0777769683617544e-08,
3870
+ "loss": 2.3492,
3871
+ "step": 5460
3872
+ },
3873
+ {
3874
+ "epoch": 0.9958128527216458,
3875
+ "grad_norm": 5.0049614906311035,
3876
+ "learning_rate": 5.2355789697144945e-09,
3877
+ "loss": 2.4414,
3878
+ "step": 5470
3879
+ },
3880
+ {
3881
+ "epoch": 0.9976333515383216,
3882
+ "grad_norm": 4.7070441246032715,
3883
+ "learning_rate": 1.6726339407857616e-09,
3884
+ "loss": 2.4294,
3885
+ "step": 5480
3886
+ },
3887
+ {
3888
+ "epoch": 0.9994538503549972,
3889
+ "grad_norm": 4.9832539558410645,
3890
+ "learning_rate": 8.907565046678557e-11,
3891
+ "loss": 2.3724,
3892
+ "step": 5490
3893
  }
3894
  ],
3895
  "logging_steps": 10,
 
3904
  "should_evaluate": false,
3905
  "should_log": false,
3906
  "should_save": true,
3907
+ "should_training_stop": true
3908
  },
3909
  "attributes": {}
3910
  }
3911
  },
3912
+ "total_flos": 1.2843428615741768e+18,
3913
  "train_batch_size": 4,
3914
  "trial_name": null,
3915
  "trial_params": null