chancharikm commited on
Commit
bf2a4fa
·
verified ·
1 Parent(s): 848eea8

Training in progress, step 600, checkpoint

Browse files
Files changed (23) hide show
  1. last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step600/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step600/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step600/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step600/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step600/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step600/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step600/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step600/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00004.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00004.safetensors +1 -1
  20. last-checkpoint/model-00003-of-00004.safetensors +1 -1
  21. last-checkpoint/model-00004-of-00004.safetensors +1 -1
  22. last-checkpoint/scheduler.pt +1 -1
  23. last-checkpoint/trainer_state.json +353 -3
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:575afa6a1f625837deb97d36829cf83613db5bb28dc430891e46d774f162da21
3
+ size 1558836997
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c4881d23fee62fd5b4a6cf936f0ebdefef57ec07e3dfa31282f2284359fa2aa
3
+ size 1558836997
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f07e487580b850d30a35017e718a7c6a89f993a051d12afec2e13a1a44e9f50
3
+ size 1558836997
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d510aed0ef34d764fd1ba01feea798f08fedba50778bdfde1270df4ea16faa5b
3
+ size 1558836997
last-checkpoint/global_step600/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466f59c3f29bcd334552bfe6f82c293e1067254ddcbed8bfee74bf81ccb7726e
3
+ size 1558836997
last-checkpoint/global_step600/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4fa80af3fe7a16b69b321622bfdedc367ec98e6eb6f5e2e5846332cfb261ac5
3
+ size 1558836997
last-checkpoint/global_step600/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27d08a0327bd85ec61184f8d12fc0a2b6c18681f3604a1e1f2de2f66fa528c17
3
+ size 1558836997
last-checkpoint/global_step600/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b7e88fb8044af0641fdcc8aad302a9ba1cbc6ff04f364abf943bbd55c5815f1
3
+ size 1558836997
last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf1bef5b13beed9d51facb4d5f618f12d61db66d3498b160498e3ac414cd8606
3
+ size 14663005
last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed405ce2ac5336d8b940632dc024427513763d5cc0808a368d1c00ebe343a75e
3
+ size 14663005
last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ab8baecfc881c3d04100f693bbb730b1956715b7553999d2f9cc9bee7a154a
3
+ size 14663005
last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce78c768cfc550ddb1e0e89d7c609e056e6b7f680fbd3ccbd7eb175772f37ce
3
+ size 14663005
last-checkpoint/global_step600/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdbcbdd3965c37a0136af46a2e2bc6e39f344de6d142fcd228e91217ca7d8555
3
+ size 14663005
last-checkpoint/global_step600/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a428081055ad9e47b3a7c5e927d063ff25362a3ac32b3ab5410bcf438a7f3e
3
+ size 14663005
last-checkpoint/global_step600/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8619b0c57795776e245f2f2a334b994f3566ee67a81fbc2a1621d70d5dbaebaf
3
+ size 14663005
last-checkpoint/global_step600/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e440df922365762744af5fde272e85434cc5189d153e06413c4cce034bd488
3
+ size 14663005
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step550
 
1
+ global_step600
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeffacdc935efb322f1c9780ab304b2632dec0ff6538a73ca8eaea9b017aabeb
3
  size 4998056552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03c7328d9e1f45351312f0574ab9b50cc0739c85c9a860a7ef8ec98e51a13393
3
  size 4998056552
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d950f49cb1d1231086ec05e5758fae16a233257120f5fc67af9bf590bdc768f
3
  size 4915962464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bfb52488190ea3cf4ded5e45301f704ebd5bf10eb7b5b465f8d553e5090d505
3
  size 4915962464
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b16f671b09b713bd2bce1808ec381833c0574ba6161b3e138386c2fd590ee36f
3
  size 4915962496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca3d879bfaf76c6064fd3dabe79ff3a9f374c38f77f4a00d18e34cedfbabfd55
3
  size 4915962496
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e87f1b1b8452e84386b7bf7b159bc2dbb1e6436187751d6cf74c9dce93938556
3
  size 2704357976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76fed592470092909399a550e3bf05f250cb2e02fe17c96e925b503becd41b31
3
  size 2704357976
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f416bb4a919630b2c32cc5413580af9cacf31b056e07273d56a5521be468e0e2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e80278f55e8d70153299e2706453208b1be7c51ade602e3812c4d61736a1757b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.227180527383367,
6
  "eval_steps": 500,
7
- "global_step": 550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3858,6 +3858,356 @@
3858
  "learning_rate": 5.190912599873818e-06,
3859
  "loss": 0.0126,
3860
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3861
  }
3862
  ],
3863
  "logging_steps": 1,
@@ -3877,7 +4227,7 @@
3877
  "attributes": {}
3878
  }
3879
  },
3880
- "total_flos": 3721971103170560.0,
3881
  "train_batch_size": 10,
3882
  "trial_name": null,
3883
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.4300202839756593,
6
  "eval_steps": 500,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3858
  "learning_rate": 5.190912599873818e-06,
3859
  "loss": 0.0126,
3860
  "step": 550
3861
+ },
3862
+ {
3863
+ "epoch": 2.231237322515213,
3864
+ "grad_norm": 0.24802505785116624,
3865
+ "learning_rate": 5.14029742304441e-06,
3866
+ "loss": 0.0108,
3867
+ "step": 551
3868
+ },
3869
+ {
3870
+ "epoch": 2.235294117647059,
3871
+ "grad_norm": 0.2595006809811974,
3872
+ "learning_rate": 5.089879149148781e-06,
3873
+ "loss": 0.0117,
3874
+ "step": 552
3875
+ },
3876
+ {
3877
+ "epoch": 2.239350912778905,
3878
+ "grad_norm": 0.25411628526886854,
3879
+ "learning_rate": 5.0396587850637554e-06,
3880
+ "loss": 0.0091,
3881
+ "step": 553
3882
+ },
3883
+ {
3884
+ "epoch": 2.2434077079107504,
3885
+ "grad_norm": 0.2612915952356164,
3886
+ "learning_rate": 4.989637333713814e-06,
3887
+ "loss": 0.01,
3888
+ "step": 554
3889
+ },
3890
+ {
3891
+ "epoch": 2.2474645030425964,
3892
+ "grad_norm": 0.26843182159434964,
3893
+ "learning_rate": 4.93981579405105e-06,
3894
+ "loss": 0.0111,
3895
+ "step": 555
3896
+ },
3897
+ {
3898
+ "epoch": 2.2515212981744424,
3899
+ "grad_norm": 0.21565821566578344,
3900
+ "learning_rate": 4.89019516103522e-06,
3901
+ "loss": 0.0093,
3902
+ "step": 556
3903
+ },
3904
+ {
3905
+ "epoch": 2.255578093306288,
3906
+ "grad_norm": 0.257015977515393,
3907
+ "learning_rate": 4.840776425613887e-06,
3908
+ "loss": 0.014,
3909
+ "step": 557
3910
+ },
3911
+ {
3912
+ "epoch": 2.259634888438134,
3913
+ "grad_norm": 0.20799379451848776,
3914
+ "learning_rate": 4.791560574702614e-06,
3915
+ "loss": 0.0082,
3916
+ "step": 558
3917
+ },
3918
+ {
3919
+ "epoch": 2.26369168356998,
3920
+ "grad_norm": 0.31771401066766947,
3921
+ "learning_rate": 4.742548591165289e-06,
3922
+ "loss": 0.0145,
3923
+ "step": 559
3924
+ },
3925
+ {
3926
+ "epoch": 2.2677484787018254,
3927
+ "grad_norm": 0.2951694121169533,
3928
+ "learning_rate": 4.693741453794433e-06,
3929
+ "loss": 0.0133,
3930
+ "step": 560
3931
+ },
3932
+ {
3933
+ "epoch": 2.2718052738336714,
3934
+ "grad_norm": 0.2628861243112249,
3935
+ "learning_rate": 4.6451401372917275e-06,
3936
+ "loss": 0.0099,
3937
+ "step": 561
3938
+ },
3939
+ {
3940
+ "epoch": 2.2758620689655173,
3941
+ "grad_norm": 0.22269435944973373,
3942
+ "learning_rate": 4.596745612248488e-06,
3943
+ "loss": 0.01,
3944
+ "step": 562
3945
+ },
3946
+ {
3947
+ "epoch": 2.279918864097363,
3948
+ "grad_norm": 0.28704577522446245,
3949
+ "learning_rate": 4.548558845126334e-06,
3950
+ "loss": 0.0133,
3951
+ "step": 563
3952
+ },
3953
+ {
3954
+ "epoch": 2.283975659229209,
3955
+ "grad_norm": 0.26309253454993753,
3956
+ "learning_rate": 4.500580798237831e-06,
3957
+ "loss": 0.0112,
3958
+ "step": 564
3959
+ },
3960
+ {
3961
+ "epoch": 2.288032454361055,
3962
+ "grad_norm": 0.36204770339604975,
3963
+ "learning_rate": 4.452812429727313e-06,
3964
+ "loss": 0.0102,
3965
+ "step": 565
3966
+ },
3967
+ {
3968
+ "epoch": 2.292089249492901,
3969
+ "grad_norm": 0.27129867464793533,
3970
+ "learning_rate": 4.405254693551754e-06,
3971
+ "loss": 0.0129,
3972
+ "step": 566
3973
+ },
3974
+ {
3975
+ "epoch": 2.2961460446247464,
3976
+ "grad_norm": 0.22000281062189,
3977
+ "learning_rate": 4.357908539461679e-06,
3978
+ "loss": 0.008,
3979
+ "step": 567
3980
+ },
3981
+ {
3982
+ "epoch": 2.3002028397565923,
3983
+ "grad_norm": 0.29624602171723224,
3984
+ "learning_rate": 4.310774912982227e-06,
3985
+ "loss": 0.0144,
3986
+ "step": 568
3987
+ },
3988
+ {
3989
+ "epoch": 2.3042596348884383,
3990
+ "grad_norm": 0.22414337252422092,
3991
+ "learning_rate": 4.263854755394256e-06,
3992
+ "loss": 0.0089,
3993
+ "step": 569
3994
+ },
3995
+ {
3996
+ "epoch": 2.308316430020284,
3997
+ "grad_norm": 0.28474418070215113,
3998
+ "learning_rate": 4.21714900371556e-06,
3999
+ "loss": 0.012,
4000
+ "step": 570
4001
+ },
4002
+ {
4003
+ "epoch": 2.31237322515213,
4004
+ "grad_norm": 0.210326196860165,
4005
+ "learning_rate": 4.170658590682134e-06,
4006
+ "loss": 0.0082,
4007
+ "step": 571
4008
+ },
4009
+ {
4010
+ "epoch": 2.316430020283976,
4011
+ "grad_norm": 0.25084743918519153,
4012
+ "learning_rate": 4.124384444729561e-06,
4013
+ "loss": 0.0089,
4014
+ "step": 572
4015
+ },
4016
+ {
4017
+ "epoch": 2.3204868154158214,
4018
+ "grad_norm": 0.28537259628253975,
4019
+ "learning_rate": 4.078327489974466e-06,
4020
+ "loss": 0.0113,
4021
+ "step": 573
4022
+ },
4023
+ {
4024
+ "epoch": 2.3245436105476673,
4025
+ "grad_norm": 0.22908276842060815,
4026
+ "learning_rate": 4.032488646196077e-06,
4027
+ "loss": 0.0101,
4028
+ "step": 574
4029
+ },
4030
+ {
4031
+ "epoch": 2.3286004056795133,
4032
+ "grad_norm": 0.23845424830115683,
4033
+ "learning_rate": 3.986868828817818e-06,
4034
+ "loss": 0.0082,
4035
+ "step": 575
4036
+ },
4037
+ {
4038
+ "epoch": 2.332657200811359,
4039
+ "grad_norm": 0.184143153098664,
4040
+ "learning_rate": 3.941468948889067e-06,
4041
+ "loss": 0.0068,
4042
+ "step": 576
4043
+ },
4044
+ {
4045
+ "epoch": 2.336713995943205,
4046
+ "grad_norm": 0.6122313601455306,
4047
+ "learning_rate": 3.8962899130669525e-06,
4048
+ "loss": 0.0108,
4049
+ "step": 577
4050
+ },
4051
+ {
4052
+ "epoch": 2.340770791075051,
4053
+ "grad_norm": 0.28166906523725416,
4054
+ "learning_rate": 3.851332623598227e-06,
4055
+ "loss": 0.0123,
4056
+ "step": 578
4057
+ },
4058
+ {
4059
+ "epoch": 2.344827586206897,
4060
+ "grad_norm": 0.25383591722677934,
4061
+ "learning_rate": 3.8065979783012746e-06,
4062
+ "loss": 0.0112,
4063
+ "step": 579
4064
+ },
4065
+ {
4066
+ "epoch": 2.3488843813387423,
4067
+ "grad_norm": 0.19603119074642442,
4068
+ "learning_rate": 3.7620868705481586e-06,
4069
+ "loss": 0.0079,
4070
+ "step": 580
4071
+ },
4072
+ {
4073
+ "epoch": 2.3529411764705883,
4074
+ "grad_norm": 0.6430341184106714,
4075
+ "learning_rate": 3.717800189246807e-06,
4076
+ "loss": 0.0079,
4077
+ "step": 581
4078
+ },
4079
+ {
4080
+ "epoch": 2.356997971602434,
4081
+ "grad_norm": 0.18811787242514347,
4082
+ "learning_rate": 3.6737388188232305e-06,
4083
+ "loss": 0.0083,
4084
+ "step": 582
4085
+ },
4086
+ {
4087
+ "epoch": 2.36105476673428,
4088
+ "grad_norm": 0.20181404498766184,
4089
+ "learning_rate": 3.629903639203884e-06,
4090
+ "loss": 0.0083,
4091
+ "step": 583
4092
+ },
4093
+ {
4094
+ "epoch": 2.365111561866126,
4095
+ "grad_norm": 0.277342849633144,
4096
+ "learning_rate": 3.5862955257980813e-06,
4097
+ "loss": 0.0117,
4098
+ "step": 584
4099
+ },
4100
+ {
4101
+ "epoch": 2.369168356997972,
4102
+ "grad_norm": 0.33333350892821556,
4103
+ "learning_rate": 3.5429153494805087e-06,
4104
+ "loss": 0.0123,
4105
+ "step": 585
4106
+ },
4107
+ {
4108
+ "epoch": 2.3732251521298173,
4109
+ "grad_norm": 0.23513638472728224,
4110
+ "learning_rate": 3.499763976573866e-06,
4111
+ "loss": 0.0108,
4112
+ "step": 586
4113
+ },
4114
+ {
4115
+ "epoch": 2.3772819472616633,
4116
+ "grad_norm": 0.9093270963617072,
4117
+ "learning_rate": 3.4568422688315027e-06,
4118
+ "loss": 0.0142,
4119
+ "step": 587
4120
+ },
4121
+ {
4122
+ "epoch": 2.3813387423935093,
4123
+ "grad_norm": 0.34746001689384526,
4124
+ "learning_rate": 3.41415108342028e-06,
4125
+ "loss": 0.0091,
4126
+ "step": 588
4127
+ },
4128
+ {
4129
+ "epoch": 2.385395537525355,
4130
+ "grad_norm": 0.2173220084157316,
4131
+ "learning_rate": 3.371691272903398e-06,
4132
+ "loss": 0.0072,
4133
+ "step": 589
4134
+ },
4135
+ {
4136
+ "epoch": 2.389452332657201,
4137
+ "grad_norm": 0.24334697959441584,
4138
+ "learning_rate": 3.329463685223411e-06,
4139
+ "loss": 0.01,
4140
+ "step": 590
4141
+ },
4142
+ {
4143
+ "epoch": 2.393509127789047,
4144
+ "grad_norm": 0.17003427758882395,
4145
+ "learning_rate": 3.287469163685241e-06,
4146
+ "loss": 0.0078,
4147
+ "step": 591
4148
+ },
4149
+ {
4150
+ "epoch": 2.3975659229208923,
4151
+ "grad_norm": 0.275983232519286,
4152
+ "learning_rate": 3.2457085469394015e-06,
4153
+ "loss": 0.0098,
4154
+ "step": 592
4155
+ },
4156
+ {
4157
+ "epoch": 2.4016227180527383,
4158
+ "grad_norm": 0.2634629301055357,
4159
+ "learning_rate": 3.204182668965198e-06,
4160
+ "loss": 0.0093,
4161
+ "step": 593
4162
+ },
4163
+ {
4164
+ "epoch": 2.4056795131845843,
4165
+ "grad_norm": 0.22325919432645464,
4166
+ "learning_rate": 3.162892359054098e-06,
4167
+ "loss": 0.0102,
4168
+ "step": 594
4169
+ },
4170
+ {
4171
+ "epoch": 2.40973630831643,
4172
+ "grad_norm": 0.18144358371062383,
4173
+ "learning_rate": 3.12183844179316e-06,
4174
+ "loss": 0.0072,
4175
+ "step": 595
4176
+ },
4177
+ {
4178
+ "epoch": 2.413793103448276,
4179
+ "grad_norm": 0.19726131763241822,
4180
+ "learning_rate": 3.081021737048565e-06,
4181
+ "loss": 0.0079,
4182
+ "step": 596
4183
+ },
4184
+ {
4185
+ "epoch": 2.417849898580122,
4186
+ "grad_norm": 0.2681563404805776,
4187
+ "learning_rate": 3.040443059949264e-06,
4188
+ "loss": 0.0124,
4189
+ "step": 597
4190
+ },
4191
+ {
4192
+ "epoch": 2.4219066937119678,
4193
+ "grad_norm": 2.0846202190030847,
4194
+ "learning_rate": 3.0001032208706653e-06,
4195
+ "loss": 0.0108,
4196
+ "step": 598
4197
+ },
4198
+ {
4199
+ "epoch": 2.4259634888438133,
4200
+ "grad_norm": 0.20806969883961424,
4201
+ "learning_rate": 2.960003025418478e-06,
4202
+ "loss": 0.0082,
4203
+ "step": 599
4204
+ },
4205
+ {
4206
+ "epoch": 2.4300202839756593,
4207
+ "grad_norm": 0.21726326567282914,
4208
+ "learning_rate": 2.9201432744126074e-06,
4209
+ "loss": 0.0114,
4210
+ "step": 600
4211
  }
4212
  ],
4213
  "logging_steps": 1,
 
4227
  "attributes": {}
4228
  }
4229
  },
4230
+ "total_flos": 4063291632517120.0,
4231
  "train_batch_size": 10,
4232
  "trial_name": null,
4233
  "trial_params": null