chancharikm commited on
Commit
45863cd
·
verified ·
1 Parent(s): 6267cd7

Training in progress, step 741, checkpoint

Browse files
Files changed (23) hide show
  1. last-checkpoint/global_step741/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step741/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step741/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step741/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step741/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step741/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step741/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step741/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step741/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step741/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step741/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step741/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step741/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step741/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step741/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step741/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00004.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00004.safetensors +1 -1
  20. last-checkpoint/model-00003-of-00004.safetensors +1 -1
  21. last-checkpoint/model-00004-of-00004.safetensors +1 -1
  22. last-checkpoint/scheduler.pt +1 -1
  23. last-checkpoint/trainer_state.json +291 -4
last-checkpoint/global_step741/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fda13d029ab0f9c8edee071d87b09a7de5b5804e4fecda73db1e5afd454bc4d
3
+ size 1558836997
last-checkpoint/global_step741/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f31fd81fedd8a51b114d1ce307479f12286b843b8d0bf0218078fa38aa1e597
3
+ size 1558836997
last-checkpoint/global_step741/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3747894a963f5a7a402f64de90807dfb74ad2a9948ee1630297df5e5fa704a50
3
+ size 1558836997
last-checkpoint/global_step741/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6b1612c1e037b02a903d5c8f8d9c1bd8b50e9ee640fedd013404bb340f4732e
3
+ size 1558836997
last-checkpoint/global_step741/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcfea27c535ab3cd36d881ec247de2c72d6bdfb73796b1bffbb08730c519d24a
3
+ size 1558836997
last-checkpoint/global_step741/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25677e0605fc00cf4f9c0c667e34e22187c0b3909186e1864d3e602cb910d56f
3
+ size 1558836997
last-checkpoint/global_step741/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae0f271ecdbb8b5030de0b834c7942d9253857873739457f97d1961852a28333
3
+ size 1558836997
last-checkpoint/global_step741/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ec54bcd226b366ac40312b32230f8b9e70a90020625973a8b07f38510726399
3
+ size 1558836997
last-checkpoint/global_step741/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd56729d92ecff4d18c51f3585cdffc2fadb7e4e3094dd9bef8e99ce72e29f45
3
+ size 14663005
last-checkpoint/global_step741/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b430189b114033f0425c7954abfe335a78443ea8e21a184f882ed3399fd3ca
3
+ size 14663005
last-checkpoint/global_step741/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46a10535d49418aa0458dd81a5114cdcccbd9b9b640eafb4f37de100b0eb5023
3
+ size 14663005
last-checkpoint/global_step741/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017dc1f9478fc98b96271b64c504487112c7ced098cd5d62d4f5fe6cc6076e8d
3
+ size 14663005
last-checkpoint/global_step741/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ab0b6d2d7b6c850a9c225498fb3c402921a4d40a72ae452f0c7671b61688d6
3
+ size 14663005
last-checkpoint/global_step741/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89c5225ce5da36546c551d76404ba07edd5fa906dfab5b6e486b9b93a3f239ca
3
+ size 14663005
last-checkpoint/global_step741/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2205bc88c456ff61beb21bc4ad6a3e8627bb9ad1d2875dd2dd3a0bb3fda4727
3
+ size 14663005
last-checkpoint/global_step741/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a2b835829b0e26aa8937b8c52f028b4e899def3bbf86088709661d1a727c74
3
+ size 14663005
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step700
 
1
+ global_step741
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4801dfb1de5603d9fd86e66233f71fd69b7514a405be533d0560a10ac2232701
3
  size 4998056552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e7042302ff6a729c45385680fa4cb4a81f634dd19c11c9b11146b0c5077077
3
  size 4998056552
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae2e98b4e0719324774a26fb6680623e8fc7671467315c0748ef1e7d0b4119ba
3
  size 4915962464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7e84e8433c1602ab31b03c77c594a11a03c5b96dbe716e1ea3be92cd66bf8e
3
  size 4915962464
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65839ff8d08fd3e1453b2209252309b7ea1b5d1d54c3d0005a4d9ff668c67d99
3
  size 4915962496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519c9fc303c91360165e09254c2a98449fa60ad64e0c0b5a61775dfbabe1ac1c
3
  size 4915962496
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e606573eedf3be0faded56cd50dd9d1206aada29ef5973f67584c3bbf371fc0d
3
  size 2704357976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f963c93178755b9faccbda482f1d1bb31346e09b0a583ae7ea0cf2dcc99034d2
3
  size 2704357976
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52c60310c79aedc98d1b85ec51f9670b286af151f72286d16d7d60189e2aebd3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33f1b5fcc0e2b43e77e522f70fde811afa6c0c979f0ac4bc190eb32c0e39007
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.8356997971602436,
6
  "eval_steps": 500,
7
- "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4908,6 +4908,293 @@
4908
  "learning_rate": 2.6343508932521243e-07,
4909
  "loss": 0.0086,
4910
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4911
  }
4912
  ],
4913
  "logging_steps": 1,
@@ -4922,12 +5209,12 @@
4922
  "should_evaluate": false,
4923
  "should_log": false,
4924
  "should_save": true,
4925
- "should_training_stop": false
4926
  },
4927
  "attributes": {}
4928
  }
4929
  },
4930
- "total_flos": 4740780390875136.0,
4931
  "train_batch_size": 10,
4932
  "trial_name": null,
4933
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 741,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4908
  "learning_rate": 2.6343508932521243e-07,
4909
  "loss": 0.0086,
4910
  "step": 700
4911
+ },
4912
+ {
4913
+ "epoch": 2.839756592292089,
4914
+ "grad_norm": 0.1847332447135413,
4915
+ "learning_rate": 2.510745991525992e-07,
4916
+ "loss": 0.0069,
4917
+ "step": 701
4918
+ },
4919
+ {
4920
+ "epoch": 2.843813387423935,
4921
+ "grad_norm": 0.21938966536504567,
4922
+ "learning_rate": 2.39008652010963e-07,
4923
+ "loss": 0.0086,
4924
+ "step": 702
4925
+ },
4926
+ {
4927
+ "epoch": 2.847870182555781,
4928
+ "grad_norm": 0.23134459803966625,
4929
+ "learning_rate": 2.2723748886298523e-07,
4930
+ "loss": 0.009,
4931
+ "step": 703
4932
+ },
4933
+ {
4934
+ "epoch": 2.8519269776876266,
4935
+ "grad_norm": 0.26561602719544714,
4936
+ "learning_rate": 2.1576134478437315e-07,
4937
+ "loss": 0.0102,
4938
+ "step": 704
4939
+ },
4940
+ {
4941
+ "epoch": 2.8559837728194726,
4942
+ "grad_norm": 1.1071488735425152,
4943
+ "learning_rate": 2.0458044895916516e-07,
4944
+ "loss": 0.0084,
4945
+ "step": 705
4946
+ },
4947
+ {
4948
+ "epoch": 2.8600405679513186,
4949
+ "grad_norm": 0.27265875970319914,
4950
+ "learning_rate": 1.9369502467514788e-07,
4951
+ "loss": 0.0111,
4952
+ "step": 706
4953
+ },
4954
+ {
4955
+ "epoch": 2.8640973630831645,
4956
+ "grad_norm": 0.22724233458925192,
4957
+ "learning_rate": 1.831052893194063e-07,
4958
+ "loss": 0.0089,
4959
+ "step": 707
4960
+ },
4961
+ {
4962
+ "epoch": 2.86815415821501,
4963
+ "grad_norm": 0.26572957456388885,
4964
+ "learning_rate": 1.7281145437397394e-07,
4965
+ "loss": 0.0102,
4966
+ "step": 708
4967
+ },
4968
+ {
4969
+ "epoch": 2.872210953346856,
4970
+ "grad_norm": 0.21131153496136418,
4971
+ "learning_rate": 1.62813725411613e-07,
4972
+ "loss": 0.0081,
4973
+ "step": 709
4974
+ },
4975
+ {
4976
+ "epoch": 2.8762677484787016,
4977
+ "grad_norm": 0.2164022001956463,
4978
+ "learning_rate": 1.5311230209171078e-07,
4979
+ "loss": 0.0115,
4980
+ "step": 710
4981
+ },
4982
+ {
4983
+ "epoch": 2.8803245436105476,
4984
+ "grad_norm": 0.18362776733451797,
4985
+ "learning_rate": 1.4370737815628809e-07,
4986
+ "loss": 0.0068,
4987
+ "step": 711
4988
+ },
4989
+ {
4990
+ "epoch": 2.8843813387423936,
4991
+ "grad_norm": 0.22201535643027853,
4992
+ "learning_rate": 1.3459914142613384e-07,
4993
+ "loss": 0.0093,
4994
+ "step": 712
4995
+ },
4996
+ {
4997
+ "epoch": 2.8884381338742395,
4998
+ "grad_norm": 0.23084858527126895,
4999
+ "learning_rate": 1.2578777379705476e-07,
5000
+ "loss": 0.0084,
5001
+ "step": 713
5002
+ },
5003
+ {
5004
+ "epoch": 2.892494929006085,
5005
+ "grad_norm": 0.33392265560180945,
5006
+ "learning_rate": 1.1727345123623667e-07,
5007
+ "loss": 0.0108,
5008
+ "step": 714
5009
+ },
5010
+ {
5011
+ "epoch": 2.896551724137931,
5012
+ "grad_norm": 0.2883283240758792,
5013
+ "learning_rate": 1.0905634377873563e-07,
5014
+ "loss": 0.0092,
5015
+ "step": 715
5016
+ },
5017
+ {
5018
+ "epoch": 2.900608519269777,
5019
+ "grad_norm": 0.2686572168243743,
5020
+ "learning_rate": 1.011366155240856e-07,
5021
+ "loss": 0.0131,
5022
+ "step": 716
5023
+ },
5024
+ {
5025
+ "epoch": 2.9046653144016226,
5026
+ "grad_norm": 0.2101145436417912,
5027
+ "learning_rate": 9.351442463301274e-08,
5028
+ "loss": 0.0073,
5029
+ "step": 717
5030
+ },
5031
+ {
5032
+ "epoch": 2.9087221095334685,
5033
+ "grad_norm": 0.18153151909382642,
5034
+ "learning_rate": 8.618992332427966e-08,
5035
+ "loss": 0.0054,
5036
+ "step": 718
5037
+ },
5038
+ {
5039
+ "epoch": 2.9127789046653145,
5040
+ "grad_norm": 0.1970262952306368,
5041
+ "learning_rate": 7.916325787164947e-08,
5042
+ "loss": 0.0073,
5043
+ "step": 719
5044
+ },
5045
+ {
5046
+ "epoch": 2.9168356997971605,
5047
+ "grad_norm": 0.23074904705177698,
5048
+ "learning_rate": 7.243456860096476e-08,
5049
+ "loss": 0.0079,
5050
+ "step": 720
5051
+ },
5052
+ {
5053
+ "epoch": 2.920892494929006,
5054
+ "grad_norm": 0.2628324630369001,
5055
+ "learning_rate": 6.600398988733824e-08,
5056
+ "loss": 0.0085,
5057
+ "step": 721
5058
+ },
5059
+ {
5060
+ "epoch": 2.924949290060852,
5061
+ "grad_norm": 0.28909539190720107,
5062
+ "learning_rate": 5.98716501524732e-08,
5063
+ "loss": 0.0126,
5064
+ "step": 722
5065
+ },
5066
+ {
5067
+ "epoch": 2.9290060851926976,
5068
+ "grad_norm": 0.27286922538855823,
5069
+ "learning_rate": 5.403767186210218e-08,
5070
+ "loss": 0.0093,
5071
+ "step": 723
5072
+ },
5073
+ {
5074
+ "epoch": 2.9330628803245435,
5075
+ "grad_norm": 0.2568428715793184,
5076
+ "learning_rate": 4.850217152353731e-08,
5077
+ "loss": 0.0074,
5078
+ "step": 724
5079
+ },
5080
+ {
5081
+ "epoch": 2.9371196754563895,
5082
+ "grad_norm": 0.4731785556263277,
5083
+ "learning_rate": 4.326525968334216e-08,
5084
+ "loss": 0.0076,
5085
+ "step": 725
5086
+ },
5087
+ {
5088
+ "epoch": 2.9411764705882355,
5089
+ "grad_norm": 0.2031164746992896,
5090
+ "learning_rate": 3.8327040925130175e-08,
5091
+ "loss": 0.0074,
5092
+ "step": 726
5093
+ },
5094
+ {
5095
+ "epoch": 2.945233265720081,
5096
+ "grad_norm": 0.4807857991510011,
5097
+ "learning_rate": 3.368761386746966e-08,
5098
+ "loss": 0.0072,
5099
+ "step": 727
5100
+ },
5101
+ {
5102
+ "epoch": 2.949290060851927,
5103
+ "grad_norm": 0.66364903613515,
5104
+ "learning_rate": 2.9347071161918703e-08,
5105
+ "loss": 0.0097,
5106
+ "step": 728
5107
+ },
5108
+ {
5109
+ "epoch": 2.9533468559837726,
5110
+ "grad_norm": 0.3120023778508287,
5111
+ "learning_rate": 2.530549949117167e-08,
5112
+ "loss": 0.0103,
5113
+ "step": 729
5114
+ },
5115
+ {
5116
+ "epoch": 2.9574036511156185,
5117
+ "grad_norm": 0.20907205106461102,
5118
+ "learning_rate": 2.1562979567330554e-08,
5119
+ "loss": 0.0069,
5120
+ "step": 730
5121
+ },
5122
+ {
5123
+ "epoch": 2.9614604462474645,
5124
+ "grad_norm": 0.23184300955403422,
5125
+ "learning_rate": 1.8119586130292964e-08,
5126
+ "loss": 0.0062,
5127
+ "step": 731
5128
+ },
5129
+ {
5130
+ "epoch": 2.9655172413793105,
5131
+ "grad_norm": 0.2240122101684402,
5132
+ "learning_rate": 1.4975387946256634e-08,
5133
+ "loss": 0.0083,
5134
+ "step": 732
5135
+ },
5136
+ {
5137
+ "epoch": 2.969574036511156,
5138
+ "grad_norm": 0.23638952694105592,
5139
+ "learning_rate": 1.213044780635053e-08,
5140
+ "loss": 0.0078,
5141
+ "step": 733
5142
+ },
5143
+ {
5144
+ "epoch": 2.973630831643002,
5145
+ "grad_norm": 0.23626023739586408,
5146
+ "learning_rate": 9.584822525377512e-09,
5147
+ "loss": 0.0081,
5148
+ "step": 734
5149
+ },
5150
+ {
5151
+ "epoch": 2.977687626774848,
5152
+ "grad_norm": 0.19999524138260236,
5153
+ "learning_rate": 7.338562940680249e-09,
5154
+ "loss": 0.0074,
5155
+ "step": 735
5156
+ },
5157
+ {
5158
+ "epoch": 2.9817444219066935,
5159
+ "grad_norm": 0.21364040585606983,
5160
+ "learning_rate": 5.391713911128693e-09,
5161
+ "loss": 0.0069,
5162
+ "step": 736
5163
+ },
5164
+ {
5165
+ "epoch": 2.9858012170385395,
5166
+ "grad_norm": 0.20996179998709147,
5167
+ "learning_rate": 3.744314316220798e-09,
5168
+ "loss": 0.0072,
5169
+ "step": 737
5170
+ },
5171
+ {
5172
+ "epoch": 2.9898580121703855,
5173
+ "grad_norm": 0.19358339065466926,
5174
+ "learning_rate": 2.396397055306476e-09,
5175
+ "loss": 0.0061,
5176
+ "step": 738
5177
+ },
5178
+ {
5179
+ "epoch": 2.9939148073022315,
5180
+ "grad_norm": 0.18667044909228617,
5181
+ "learning_rate": 1.3479890469314527e-09,
5182
+ "loss": 0.0075,
5183
+ "step": 739
5184
+ },
5185
+ {
5186
+ "epoch": 2.997971602434077,
5187
+ "grad_norm": 0.1920159799469308,
5188
+ "learning_rate": 5.991112283026956e-10,
5189
+ "loss": 0.0074,
5190
+ "step": 740
5191
+ },
5192
+ {
5193
+ "epoch": 3.0,
5194
+ "grad_norm": 0.1960028788559094,
5195
+ "learning_rate": 1.4977855486209002e-10,
5196
+ "loss": 0.0037,
5197
+ "step": 741
5198
  }
5199
  ],
5200
  "logging_steps": 1,
 
5209
  "should_evaluate": false,
5210
  "should_log": false,
5211
  "should_save": true,
5212
+ "should_training_stop": true
5213
  },
5214
  "attributes": {}
5215
  }
5216
  },
5217
+ "total_flos": 5016006509985792.0,
5218
  "train_batch_size": 10,
5219
  "trial_name": null,
5220
  "trial_params": null