PurplelinkPL commited on
Commit
5ac64ff
·
verified ·
1 Parent(s): cbd96e3

Upload 10 files

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +1095 -3
  6. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a710773cfd7f93749b548b4dc475790d75538b97475d047166dceb50704eb746
3
  size 598635032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f341e7c0d50547a5d48a2244cc30330ab7ed2ceaff5186455a531e8c69a77105
3
  size 598635032
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fedf1b4c8a508947f08f4a98315b58cd6a43e2a1adda4f18d9617c092f6a8844
3
  size 1197359627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b47ad41e6a351695e914e54e9b102721f48985ba894d88bae93aad1de73672f
3
  size 1197359627
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0412622810efe6fde95b3cfeff4557f637e942d79ee2fa68f136e7ee99e430b1
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0065f5fa67d21a3e3251b9235347d2a9d93494140e986cefd3a276ca1160a3e0
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5967d2fde5e8af8b726d755ee2aea2a1a3996cd4db019463bea602f6a5c353f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad1df73ab0092710b52025da1ad2250f73bf46d66d45d561b7da8dfce44525e
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.002796,
6
  "eval_steps": 1000,
7
- "global_step": 368000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -28719,6 +28719,1098 @@
28719
  "eval_samples_per_second": 196.867,
28720
  "eval_steps_per_second": 1.545,
28721
  "step": 368000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28722
  }
28723
  ],
28724
  "logging_steps": 100,
@@ -28738,7 +29830,7 @@
28738
  "attributes": {}
28739
  }
28740
  },
28741
- "total_flos": 3.211620496844764e+19,
28742
  "train_batch_size": 128,
28743
  "trial_name": null,
28744
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 117.000154,
6
  "eval_steps": 1000,
7
+ "global_step": 382000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
28719
  "eval_samples_per_second": 196.867,
28720
  "eval_steps_per_second": 1.545,
28721
  "step": 368000
28722
+ },
28723
+ {
28724
+ "epoch": 0.0002,
28725
+ "grad_norm": 1.7583304643630981,
28726
+ "learning_rate": 8.378246507831702e-06,
28727
+ "loss": 2.0821,
28728
+ "step": 368100
28729
+ },
28730
+ {
28731
+ "epoch": 1.000162,
28732
+ "grad_norm": 1.6928149461746216,
28733
+ "learning_rate": 8.366398135029847e-06,
28734
+ "loss": 1.9175,
28735
+ "step": 368200
28736
+ },
28737
+ {
28738
+ "epoch": 2.000124,
28739
+ "grad_norm": 1.6941829919815063,
28740
+ "learning_rate": 8.354556462240829e-06,
28741
+ "loss": 1.8645,
28742
+ "step": 368300
28743
+ },
28744
+ {
28745
+ "epoch": 3.000086,
28746
+ "grad_norm": 1.6606141328811646,
28747
+ "learning_rate": 8.342721494234487e-06,
28748
+ "loss": 1.8296,
28749
+ "step": 368400
28750
+ },
28751
+ {
28752
+ "epoch": 4.000048,
28753
+ "grad_norm": 1.508047342300415,
28754
+ "learning_rate": 8.330893235777929e-06,
28755
+ "loss": 1.7982,
28756
+ "step": 368500
28757
+ },
28758
+ {
28759
+ "epoch": 5.00001,
28760
+ "grad_norm": 1.6567221879959106,
28761
+ "learning_rate": 8.31907169163558e-06,
28762
+ "loss": 1.776,
28763
+ "step": 368600
28764
+ },
28765
+ {
28766
+ "epoch": 5.00021,
28767
+ "grad_norm": 1.5388526916503906,
28768
+ "learning_rate": 8.30725686656916e-06,
28769
+ "loss": 1.7492,
28770
+ "step": 368700
28771
+ },
28772
+ {
28773
+ "epoch": 6.000172,
28774
+ "grad_norm": 1.6148278713226318,
28775
+ "learning_rate": 8.295448765337685e-06,
28776
+ "loss": 1.7284,
28777
+ "step": 368800
28778
+ },
28779
+ {
28780
+ "epoch": 7.000134,
28781
+ "grad_norm": 1.5249569416046143,
28782
+ "learning_rate": 8.28364739269744e-06,
28783
+ "loss": 1.7221,
28784
+ "step": 368900
28785
+ },
28786
+ {
28787
+ "epoch": 8.000096,
28788
+ "grad_norm": 1.5550845861434937,
28789
+ "learning_rate": 8.271852753402028e-06,
28790
+ "loss": 1.7079,
28791
+ "step": 369000
28792
+ },
28793
+ {
28794
+ "epoch": 8.000096,
28795
+ "eval_loss": 1.9369168281555176,
28796
+ "eval_runtime": 55.0617,
28797
+ "eval_samples_per_second": 185.138,
28798
+ "eval_steps_per_second": 1.453,
28799
+ "step": 369000
28800
+ },
28801
+ {
28802
+ "epoch": 9.000058,
28803
+ "grad_norm": 2.09401273727417,
28804
+ "learning_rate": 8.260064852202329e-06,
28805
+ "loss": 3.9424,
28806
+ "step": 369100
28807
+ },
28808
+ {
28809
+ "epoch": 10.00002,
28810
+ "grad_norm": 1.9706476926803589,
28811
+ "learning_rate": 8.248283693846509e-06,
28812
+ "loss": 2.7687,
28813
+ "step": 369200
28814
+ },
28815
+ {
28816
+ "epoch": 10.00022,
28817
+ "grad_norm": 2.0509135723114014,
28818
+ "learning_rate": 8.23650928308001e-06,
28819
+ "loss": 2.546,
28820
+ "step": 369300
28821
+ },
28822
+ {
28823
+ "epoch": 11.000182,
28824
+ "grad_norm": 1.9125868082046509,
28825
+ "learning_rate": 8.224741624645565e-06,
28826
+ "loss": 2.4164,
28827
+ "step": 369400
28828
+ },
28829
+ {
28830
+ "epoch": 12.000144,
28831
+ "grad_norm": 2.175070285797119,
28832
+ "learning_rate": 8.212980723283186e-06,
28833
+ "loss": 2.3405,
28834
+ "step": 369500
28835
+ },
28836
+ {
28837
+ "epoch": 13.000106,
28838
+ "grad_norm": 1.9154648780822754,
28839
+ "learning_rate": 8.201226583730175e-06,
28840
+ "loss": 2.2729,
28841
+ "step": 369600
28842
+ },
28843
+ {
28844
+ "epoch": 14.000068,
28845
+ "grad_norm": 2.021451711654663,
28846
+ "learning_rate": 8.189479210721076e-06,
28847
+ "loss": 2.2268,
28848
+ "step": 369700
28849
+ },
28850
+ {
28851
+ "epoch": 15.00003,
28852
+ "grad_norm": 2.0009710788726807,
28853
+ "learning_rate": 8.177738608987745e-06,
28854
+ "loss": 2.1859,
28855
+ "step": 369800
28856
+ },
28857
+ {
28858
+ "epoch": 15.00023,
28859
+ "grad_norm": 1.9311867952346802,
28860
+ "learning_rate": 8.166004783259295e-06,
28861
+ "loss": 2.1494,
28862
+ "step": 369900
28863
+ },
28864
+ {
28865
+ "epoch": 16.000192,
28866
+ "grad_norm": 1.967115044593811,
28867
+ "learning_rate": 8.154277738262097e-06,
28868
+ "loss": 2.1181,
28869
+ "step": 370000
28870
+ },
28871
+ {
28872
+ "epoch": 16.000192,
28873
+ "eval_loss": 2.407406806945801,
28874
+ "eval_runtime": 54.9275,
28875
+ "eval_samples_per_second": 185.59,
28876
+ "eval_steps_per_second": 1.456,
28877
+ "step": 370000
28878
+ },
28879
+ {
28880
+ "epoch": 17.000154,
28881
+ "grad_norm": 2.050703525543213,
28882
+ "learning_rate": 8.142557478719814e-06,
28883
+ "loss": 2.496,
28884
+ "step": 370100
28885
+ },
28886
+ {
28887
+ "epoch": 18.000116,
28888
+ "grad_norm": 2.053346872329712,
28889
+ "learning_rate": 8.130844009353362e-06,
28890
+ "loss": 2.3323,
28891
+ "step": 370200
28892
+ },
28893
+ {
28894
+ "epoch": 19.000078,
28895
+ "grad_norm": 1.9913196563720703,
28896
+ "learning_rate": 8.119137334880933e-06,
28897
+ "loss": 2.2625,
28898
+ "step": 370300
28899
+ },
28900
+ {
28901
+ "epoch": 20.00004,
28902
+ "grad_norm": 2.018827438354492,
28903
+ "learning_rate": 8.107437460017958e-06,
28904
+ "loss": 2.2166,
28905
+ "step": 370400
28906
+ },
28907
+ {
28908
+ "epoch": 21.000002,
28909
+ "grad_norm": 2.2157461643218994,
28910
+ "learning_rate": 8.095744389477155e-06,
28911
+ "loss": 2.1759,
28912
+ "step": 370500
28913
+ },
28914
+ {
28915
+ "epoch": 21.000202,
28916
+ "grad_norm": 1.975722312927246,
28917
+ "learning_rate": 8.084058127968497e-06,
28918
+ "loss": 2.1349,
28919
+ "step": 370600
28920
+ },
28921
+ {
28922
+ "epoch": 22.000164,
28923
+ "grad_norm": 2.118351459503174,
28924
+ "learning_rate": 8.072378680199197e-06,
28925
+ "loss": 2.1051,
28926
+ "step": 370700
28927
+ },
28928
+ {
28929
+ "epoch": 23.000126,
28930
+ "grad_norm": 1.9632095098495483,
28931
+ "learning_rate": 8.060706050873746e-06,
28932
+ "loss": 2.0781,
28933
+ "step": 370800
28934
+ },
28935
+ {
28936
+ "epoch": 24.000088,
28937
+ "grad_norm": 2.0141265392303467,
28938
+ "learning_rate": 8.049040244693864e-06,
28939
+ "loss": 2.0583,
28940
+ "step": 370900
28941
+ },
28942
+ {
28943
+ "epoch": 25.00005,
28944
+ "grad_norm": 2.0297467708587646,
28945
+ "learning_rate": 8.037381266358546e-06,
28946
+ "loss": 2.0323,
28947
+ "step": 371000
28948
+ },
28949
+ {
28950
+ "epoch": 25.00005,
28951
+ "eval_loss": 2.3608412742614746,
28952
+ "eval_runtime": 55.107,
28953
+ "eval_samples_per_second": 184.986,
28954
+ "eval_steps_per_second": 1.452,
28955
+ "step": 371000
28956
+ },
28957
+ {
28958
+ "epoch": 26.000012,
28959
+ "grad_norm": 2.0017659664154053,
28960
+ "learning_rate": 8.025729120564025e-06,
28961
+ "loss": 2.2111,
28962
+ "step": 371100
28963
+ },
28964
+ {
28965
+ "epoch": 26.000212,
28966
+ "grad_norm": 2.087977409362793,
28967
+ "learning_rate": 8.01408381200379e-06,
28968
+ "loss": 2.1626,
28969
+ "step": 371200
28970
+ },
28971
+ {
28972
+ "epoch": 27.000174,
28973
+ "grad_norm": 1.9115463495254517,
28974
+ "learning_rate": 8.002445345368556e-06,
28975
+ "loss": 2.1198,
28976
+ "step": 371300
28977
+ },
28978
+ {
28979
+ "epoch": 28.000136,
28980
+ "grad_norm": 2.075347423553467,
28981
+ "learning_rate": 7.990813725346307e-06,
28982
+ "loss": 2.0987,
28983
+ "step": 371400
28984
+ },
28985
+ {
28986
+ "epoch": 29.000098,
28987
+ "grad_norm": 2.004270553588867,
28988
+ "learning_rate": 7.979188956622263e-06,
28989
+ "loss": 2.0634,
28990
+ "step": 371500
28991
+ },
28992
+ {
28993
+ "epoch": 30.00006,
28994
+ "grad_norm": 2.0730834007263184,
28995
+ "learning_rate": 7.967571043878863e-06,
28996
+ "loss": 2.0421,
28997
+ "step": 371600
28998
+ },
28999
+ {
29000
+ "epoch": 31.000022,
29001
+ "grad_norm": 2.0204977989196777,
29002
+ "learning_rate": 7.955959991795809e-06,
29003
+ "loss": 2.0191,
29004
+ "step": 371700
29005
+ },
29006
+ {
29007
+ "epoch": 31.000222,
29008
+ "grad_norm": 1.9809165000915527,
29009
+ "learning_rate": 7.944355805050032e-06,
29010
+ "loss": 1.9979,
29011
+ "step": 371800
29012
+ },
29013
+ {
29014
+ "epoch": 32.000184,
29015
+ "grad_norm": 1.8896480798721313,
29016
+ "learning_rate": 7.932758488315705e-06,
29017
+ "loss": 1.9788,
29018
+ "step": 371900
29019
+ },
29020
+ {
29021
+ "epoch": 33.000146,
29022
+ "grad_norm": 1.8905068635940552,
29023
+ "learning_rate": 7.921168046264213e-06,
29024
+ "loss": 1.9646,
29025
+ "step": 372000
29026
+ },
29027
+ {
29028
+ "epoch": 33.000146,
29029
+ "eval_loss": 2.3312835693359375,
29030
+ "eval_runtime": 55.0336,
29031
+ "eval_samples_per_second": 185.232,
29032
+ "eval_steps_per_second": 1.454,
29033
+ "step": 372000
29034
+ },
29035
+ {
29036
+ "epoch": 34.000108,
29037
+ "grad_norm": 2.0993173122406006,
29038
+ "learning_rate": 7.909584483564187e-06,
29039
+ "loss": 2.0813,
29040
+ "step": 372100
29041
+ },
29042
+ {
29043
+ "epoch": 35.00007,
29044
+ "grad_norm": 2.0958781242370605,
29045
+ "learning_rate": 7.898007804881485e-06,
29046
+ "loss": 2.0596,
29047
+ "step": 372200
29048
+ },
29049
+ {
29050
+ "epoch": 36.000032,
29051
+ "grad_norm": 1.9180951118469238,
29052
+ "learning_rate": 7.886438014879205e-06,
29053
+ "loss": 2.0353,
29054
+ "step": 372300
29055
+ },
29056
+ {
29057
+ "epoch": 36.000232,
29058
+ "grad_norm": 2.0129170417785645,
29059
+ "learning_rate": 7.874875118217639e-06,
29060
+ "loss": 2.007,
29061
+ "step": 372400
29062
+ },
29063
+ {
29064
+ "epoch": 37.000194,
29065
+ "grad_norm": 1.9586989879608154,
29066
+ "learning_rate": 7.863319119554325e-06,
29067
+ "loss": 1.9911,
29068
+ "step": 372500
29069
+ },
29070
+ {
29071
+ "epoch": 38.000156,
29072
+ "grad_norm": 2.0036728382110596,
29073
+ "learning_rate": 7.851770023544022e-06,
29074
+ "loss": 1.97,
29075
+ "step": 372600
29076
+ },
29077
+ {
29078
+ "epoch": 39.000118,
29079
+ "grad_norm": 2.0655548572540283,
29080
+ "learning_rate": 7.840227834838709e-06,
29081
+ "loss": 1.9609,
29082
+ "step": 372700
29083
+ },
29084
+ {
29085
+ "epoch": 40.00008,
29086
+ "grad_norm": 1.8536264896392822,
29087
+ "learning_rate": 7.828692558087566e-06,
29088
+ "loss": 1.9389,
29089
+ "step": 372800
29090
+ },
29091
+ {
29092
+ "epoch": 41.000042,
29093
+ "grad_norm": 2.0123019218444824,
29094
+ "learning_rate": 7.817164197937006e-06,
29095
+ "loss": 1.9311,
29096
+ "step": 372900
29097
+ },
29098
+ {
29099
+ "epoch": 42.000004,
29100
+ "grad_norm": 1.9356095790863037,
29101
+ "learning_rate": 7.80564275903066e-06,
29102
+ "loss": 1.9157,
29103
+ "step": 373000
29104
+ },
29105
+ {
29106
+ "epoch": 42.000004,
29107
+ "eval_loss": 2.2908835411071777,
29108
+ "eval_runtime": 54.8694,
29109
+ "eval_samples_per_second": 185.787,
29110
+ "eval_steps_per_second": 1.458,
29111
+ "step": 373000
29112
+ },
29113
+ {
29114
+ "epoch": 42.000204,
29115
+ "grad_norm": 1.9983534812927246,
29116
+ "learning_rate": 7.794128246009346e-06,
29117
+ "loss": 1.9932,
29118
+ "step": 373100
29119
+ },
29120
+ {
29121
+ "epoch": 43.000166,
29122
+ "grad_norm": 2.0036892890930176,
29123
+ "learning_rate": 7.782620663511117e-06,
29124
+ "loss": 1.9803,
29125
+ "step": 373200
29126
+ },
29127
+ {
29128
+ "epoch": 44.000128,
29129
+ "grad_norm": 1.9349839687347412,
29130
+ "learning_rate": 7.771120016171227e-06,
29131
+ "loss": 1.9687,
29132
+ "step": 373300
29133
+ },
29134
+ {
29135
+ "epoch": 45.00009,
29136
+ "grad_norm": 1.8848403692245483,
29137
+ "learning_rate": 7.759626308622142e-06,
29138
+ "loss": 1.9474,
29139
+ "step": 373400
29140
+ },
29141
+ {
29142
+ "epoch": 46.000052,
29143
+ "grad_norm": 1.9943233728408813,
29144
+ "learning_rate": 7.74813954549351e-06,
29145
+ "loss": 1.9319,
29146
+ "step": 373500
29147
+ },
29148
+ {
29149
+ "epoch": 47.000014,
29150
+ "grad_norm": 1.9002938270568848,
29151
+ "learning_rate": 7.736659731412204e-06,
29152
+ "loss": 1.9217,
29153
+ "step": 373600
29154
+ },
29155
+ {
29156
+ "epoch": 47.000214,
29157
+ "grad_norm": 1.9708117246627808,
29158
+ "learning_rate": 7.725186871002296e-06,
29159
+ "loss": 1.9083,
29160
+ "step": 373700
29161
+ },
29162
+ {
29163
+ "epoch": 48.000176,
29164
+ "grad_norm": 1.9721884727478027,
29165
+ "learning_rate": 7.713720968885057e-06,
29166
+ "loss": 1.8956,
29167
+ "step": 373800
29168
+ },
29169
+ {
29170
+ "epoch": 49.000138,
29171
+ "grad_norm": 1.9223700761795044,
29172
+ "learning_rate": 7.702262029678939e-06,
29173
+ "loss": 1.8808,
29174
+ "step": 373900
29175
+ },
29176
+ {
29177
+ "epoch": 50.0001,
29178
+ "grad_norm": 2.03428316116333,
29179
+ "learning_rate": 7.690810057999607e-06,
29180
+ "loss": 1.868,
29181
+ "step": 374000
29182
+ },
29183
+ {
29184
+ "epoch": 50.0001,
29185
+ "eval_loss": 2.2805299758911133,
29186
+ "eval_runtime": 55.0731,
29187
+ "eval_samples_per_second": 185.099,
29188
+ "eval_steps_per_second": 1.453,
29189
+ "step": 374000
29190
+ },
29191
+ {
29192
+ "epoch": 51.000062,
29193
+ "grad_norm": 1.947739601135254,
29194
+ "learning_rate": 7.67936505845991e-06,
29195
+ "loss": 1.9356,
29196
+ "step": 374100
29197
+ },
29198
+ {
29199
+ "epoch": 52.000024,
29200
+ "grad_norm": 1.939833164215088,
29201
+ "learning_rate": 7.667927035669906e-06,
29202
+ "loss": 1.9287,
29203
+ "step": 374200
29204
+ },
29205
+ {
29206
+ "epoch": 52.000224,
29207
+ "grad_norm": 2.120412588119507,
29208
+ "learning_rate": 7.656495994236813e-06,
29209
+ "loss": 1.9083,
29210
+ "step": 374300
29211
+ },
29212
+ {
29213
+ "epoch": 53.000186,
29214
+ "grad_norm": 1.9514408111572266,
29215
+ "learning_rate": 7.645071938765055e-06,
29216
+ "loss": 1.9005,
29217
+ "step": 374400
29218
+ },
29219
+ {
29220
+ "epoch": 54.000148,
29221
+ "grad_norm": 1.9537405967712402,
29222
+ "learning_rate": 7.633654873856258e-06,
29223
+ "loss": 1.8885,
29224
+ "step": 374500
29225
+ },
29226
+ {
29227
+ "epoch": 55.00011,
29228
+ "grad_norm": 1.9912673234939575,
29229
+ "learning_rate": 7.6222448041091884e-06,
29230
+ "loss": 1.8727,
29231
+ "step": 374600
29232
+ },
29233
+ {
29234
+ "epoch": 56.000072,
29235
+ "grad_norm": 2.0160086154937744,
29236
+ "learning_rate": 7.6108417341198366e-06,
29237
+ "loss": 1.8652,
29238
+ "step": 374700
29239
+ },
29240
+ {
29241
+ "epoch": 57.000034,
29242
+ "grad_norm": 1.962786078453064,
29243
+ "learning_rate": 7.599445668481353e-06,
29244
+ "loss": 1.8495,
29245
+ "step": 374800
29246
+ },
29247
+ {
29248
+ "epoch": 57.000234,
29249
+ "grad_norm": 2.0677285194396973,
29250
+ "learning_rate": 7.588056611784084e-06,
29251
+ "loss": 1.8414,
29252
+ "step": 374900
29253
+ },
29254
+ {
29255
+ "epoch": 58.000196,
29256
+ "grad_norm": 1.923409104347229,
29257
+ "learning_rate": 7.576674568615519e-06,
29258
+ "loss": 1.8278,
29259
+ "step": 375000
29260
+ },
29261
+ {
29262
+ "epoch": 58.000196,
29263
+ "eval_loss": 2.2644314765930176,
29264
+ "eval_runtime": 54.7576,
29265
+ "eval_samples_per_second": 186.166,
29266
+ "eval_steps_per_second": 1.461,
29267
+ "step": 375000
29268
+ },
29269
+ {
29270
+ "epoch": 59.000158,
29271
+ "grad_norm": 2.0004312992095947,
29272
+ "learning_rate": 7.565299543560353e-06,
29273
+ "loss": 1.8848,
29274
+ "step": 375100
29275
+ },
29276
+ {
29277
+ "epoch": 60.00012,
29278
+ "grad_norm": 2.0457980632781982,
29279
+ "learning_rate": 7.553931541200448e-06,
29280
+ "loss": 1.8788,
29281
+ "step": 375200
29282
+ },
29283
+ {
29284
+ "epoch": 61.000082,
29285
+ "grad_norm": 1.9472349882125854,
29286
+ "learning_rate": 7.54257056611484e-06,
29287
+ "loss": 1.8666,
29288
+ "step": 375300
29289
+ },
29290
+ {
29291
+ "epoch": 62.000044,
29292
+ "grad_norm": 2.019150733947754,
29293
+ "learning_rate": 7.531216622879711e-06,
29294
+ "loss": 1.8555,
29295
+ "step": 375400
29296
+ },
29297
+ {
29298
+ "epoch": 63.000006,
29299
+ "grad_norm": 1.9674944877624512,
29300
+ "learning_rate": 7.5198697160684365e-06,
29301
+ "loss": 1.8495,
29302
+ "step": 375500
29303
+ },
29304
+ {
29305
+ "epoch": 63.000206,
29306
+ "grad_norm": 1.959089756011963,
29307
+ "learning_rate": 7.5085298502515525e-06,
29308
+ "loss": 1.8353,
29309
+ "step": 375600
29310
+ },
29311
+ {
29312
+ "epoch": 64.000168,
29313
+ "grad_norm": 1.9350240230560303,
29314
+ "learning_rate": 7.4971970299967605e-06,
29315
+ "loss": 1.8257,
29316
+ "step": 375700
29317
+ },
29318
+ {
29319
+ "epoch": 65.00013,
29320
+ "grad_norm": 1.9134896993637085,
29321
+ "learning_rate": 7.4858712598689014e-06,
29322
+ "loss": 1.8124,
29323
+ "step": 375800
29324
+ },
29325
+ {
29326
+ "epoch": 66.000092,
29327
+ "grad_norm": 2.0086705684661865,
29328
+ "learning_rate": 7.474552544430008e-06,
29329
+ "loss": 1.8052,
29330
+ "step": 375900
29331
+ },
29332
+ {
29333
+ "epoch": 67.000054,
29334
+ "grad_norm": 1.9945427179336548,
29335
+ "learning_rate": 7.4632408882392504e-06,
29336
+ "loss": 1.8005,
29337
+ "step": 376000
29338
+ },
29339
+ {
29340
+ "epoch": 67.000054,
29341
+ "eval_loss": 2.248349189758301,
29342
+ "eval_runtime": 54.5876,
29343
+ "eval_samples_per_second": 186.746,
29344
+ "eval_steps_per_second": 1.466,
29345
+ "step": 376000
29346
+ },
29347
+ {
29348
+ "epoch": 68.000016,
29349
+ "grad_norm": 1.9743598699569702,
29350
+ "learning_rate": 7.451936295852976e-06,
29351
+ "loss": 1.8454,
29352
+ "step": 376100
29353
+ },
29354
+ {
29355
+ "epoch": 68.000216,
29356
+ "grad_norm": 1.898568034172058,
29357
+ "learning_rate": 7.440638771824654e-06,
29358
+ "loss": 1.8431,
29359
+ "step": 376200
29360
+ },
29361
+ {
29362
+ "epoch": 69.000178,
29363
+ "grad_norm": 2.142463445663452,
29364
+ "learning_rate": 7.429348320704935e-06,
29365
+ "loss": 1.8277,
29366
+ "step": 376300
29367
+ },
29368
+ {
29369
+ "epoch": 70.00014,
29370
+ "grad_norm": 1.9892468452453613,
29371
+ "learning_rate": 7.41806494704162e-06,
29372
+ "loss": 1.8119,
29373
+ "step": 376400
29374
+ },
29375
+ {
29376
+ "epoch": 71.000102,
29377
+ "grad_norm": 2.005885601043701,
29378
+ "learning_rate": 7.406788655379634e-06,
29379
+ "loss": 1.8086,
29380
+ "step": 376500
29381
+ },
29382
+ {
29383
+ "epoch": 72.000064,
29384
+ "grad_norm": 1.9385697841644287,
29385
+ "learning_rate": 7.395519450261074e-06,
29386
+ "loss": 1.8024,
29387
+ "step": 376600
29388
+ },
29389
+ {
29390
+ "epoch": 73.000026,
29391
+ "grad_norm": 1.9773157835006714,
29392
+ "learning_rate": 7.384257336225173e-06,
29393
+ "loss": 1.7934,
29394
+ "step": 376700
29395
+ },
29396
+ {
29397
+ "epoch": 73.000226,
29398
+ "grad_norm": 1.8618143796920776,
29399
+ "learning_rate": 7.373002317808317e-06,
29400
+ "loss": 1.7824,
29401
+ "step": 376800
29402
+ },
29403
+ {
29404
+ "epoch": 74.000188,
29405
+ "grad_norm": 1.9531538486480713,
29406
+ "learning_rate": 7.361754399544013e-06,
29407
+ "loss": 1.7727,
29408
+ "step": 376900
29409
+ },
29410
+ {
29411
+ "epoch": 75.00015,
29412
+ "grad_norm": 1.931515097618103,
29413
+ "learning_rate": 7.350513585962926e-06,
29414
+ "loss": 1.764,
29415
+ "step": 377000
29416
+ },
29417
+ {
29418
+ "epoch": 75.00015,
29419
+ "eval_loss": 2.2430500984191895,
29420
+ "eval_runtime": 54.6415,
29421
+ "eval_samples_per_second": 186.561,
29422
+ "eval_steps_per_second": 1.464,
29423
+ "step": 377000
29424
+ },
29425
+ {
29426
+ "epoch": 76.000112,
29427
+ "grad_norm": 1.9521348476409912,
29428
+ "learning_rate": 7.339279881592859e-06,
29429
+ "loss": 1.8087,
29430
+ "step": 377100
29431
+ },
29432
+ {
29433
+ "epoch": 77.000074,
29434
+ "grad_norm": 2.0013513565063477,
29435
+ "learning_rate": 7.32805329095875e-06,
29436
+ "loss": 1.8023,
29437
+ "step": 377200
29438
+ },
29439
+ {
29440
+ "epoch": 78.000036,
29441
+ "grad_norm": 1.8955408334732056,
29442
+ "learning_rate": 7.316833818582652e-06,
29443
+ "loss": 1.7943,
29444
+ "step": 377300
29445
+ },
29446
+ {
29447
+ "epoch": 78.000236,
29448
+ "grad_norm": 2.0025761127471924,
29449
+ "learning_rate": 7.305621468983781e-06,
29450
+ "loss": 1.7903,
29451
+ "step": 377400
29452
+ },
29453
+ {
29454
+ "epoch": 79.000198,
29455
+ "grad_norm": 1.9769165515899658,
29456
+ "learning_rate": 7.294416246678462e-06,
29457
+ "loss": 1.7774,
29458
+ "step": 377500
29459
+ },
29460
+ {
29461
+ "epoch": 80.00016,
29462
+ "grad_norm": 1.8650860786437988,
29463
+ "learning_rate": 7.283218156180174e-06,
29464
+ "loss": 1.7698,
29465
+ "step": 377600
29466
+ },
29467
+ {
29468
+ "epoch": 81.000122,
29469
+ "grad_norm": 1.9133366346359253,
29470
+ "learning_rate": 7.272027201999484e-06,
29471
+ "loss": 1.7658,
29472
+ "step": 377700
29473
+ },
29474
+ {
29475
+ "epoch": 82.000084,
29476
+ "grad_norm": 1.9629889726638794,
29477
+ "learning_rate": 7.260843388644117e-06,
29478
+ "loss": 1.7552,
29479
+ "step": 377800
29480
+ },
29481
+ {
29482
+ "epoch": 83.000046,
29483
+ "grad_norm": 1.9844943284988403,
29484
+ "learning_rate": 7.249666720618919e-06,
29485
+ "loss": 1.7539,
29486
+ "step": 377900
29487
+ },
29488
+ {
29489
+ "epoch": 84.000008,
29490
+ "grad_norm": 1.9470826387405396,
29491
+ "learning_rate": 7.238497202425834e-06,
29492
+ "loss": 1.7404,
29493
+ "step": 378000
29494
+ },
29495
+ {
29496
+ "epoch": 84.000008,
29497
+ "eval_loss": 2.234076499938965,
29498
+ "eval_runtime": 54.5427,
29499
+ "eval_samples_per_second": 186.9,
29500
+ "eval_steps_per_second": 1.467,
29501
+ "step": 378000
29502
+ },
29503
+ {
29504
+ "epoch": 84.000208,
29505
+ "grad_norm": 2.091539144515991,
29506
+ "learning_rate": 7.2273348385639535e-06,
29507
+ "loss": 1.7783,
29508
+ "step": 378100
29509
+ },
29510
+ {
29511
+ "epoch": 85.00017,
29512
+ "grad_norm": 1.9156265258789062,
29513
+ "learning_rate": 7.216179633529477e-06,
29514
+ "loss": 1.7714,
29515
+ "step": 378200
29516
+ },
29517
+ {
29518
+ "epoch": 86.000132,
29519
+ "grad_norm": 2.0570554733276367,
29520
+ "learning_rate": 7.205031591815723e-06,
29521
+ "loss": 1.7658,
29522
+ "step": 378300
29523
+ },
29524
+ {
29525
+ "epoch": 87.000094,
29526
+ "grad_norm": 2.0413947105407715,
29527
+ "learning_rate": 7.193890717913107e-06,
29528
+ "loss": 1.7564,
29529
+ "step": 378400
29530
+ },
29531
+ {
29532
+ "epoch": 88.000056,
29533
+ "grad_norm": 1.91609787940979,
29534
+ "learning_rate": 7.18275701630918e-06,
29535
+ "loss": 1.7538,
29536
+ "step": 378500
29537
+ },
29538
+ {
29539
+ "epoch": 89.000018,
29540
+ "grad_norm": 1.8070498704910278,
29541
+ "learning_rate": 7.171630491488598e-06,
29542
+ "loss": 1.7439,
29543
+ "step": 378600
29544
+ },
29545
+ {
29546
+ "epoch": 89.000218,
29547
+ "grad_norm": 1.9066287279129028,
29548
+ "learning_rate": 7.16051114793313e-06,
29549
+ "loss": 1.7382,
29550
+ "step": 378700
29551
+ },
29552
+ {
29553
+ "epoch": 90.00018,
29554
+ "grad_norm": 1.8805670738220215,
29555
+ "learning_rate": 7.149398990121628e-06,
29556
+ "loss": 1.7322,
29557
+ "step": 378800
29558
+ },
29559
+ {
29560
+ "epoch": 91.000142,
29561
+ "grad_norm": 1.93112313747406,
29562
+ "learning_rate": 7.138294022530081e-06,
29563
+ "loss": 1.7221,
29564
+ "step": 378900
29565
+ },
29566
+ {
29567
+ "epoch": 92.000104,
29568
+ "grad_norm": 1.9273699522018433,
29569
+ "learning_rate": 7.127196249631565e-06,
29570
+ "loss": 1.717,
29571
+ "step": 379000
29572
+ },
29573
+ {
29574
+ "epoch": 92.000104,
29575
+ "eval_loss": 2.222762107849121,
29576
+ "eval_runtime": 54.5793,
29577
+ "eval_samples_per_second": 186.774,
29578
+ "eval_steps_per_second": 1.466,
29579
+ "step": 379000
29580
+ },
29581
+ {
29582
+ "epoch": 93.000066,
29583
+ "grad_norm": 1.9170584678649902,
29584
+ "learning_rate": 7.116105675896276e-06,
29585
+ "loss": 1.7486,
29586
+ "step": 379100
29587
+ },
29588
+ {
29589
+ "epoch": 94.000028,
29590
+ "grad_norm": 1.886796474456787,
29591
+ "learning_rate": 7.105022305791467e-06,
29592
+ "loss": 1.7455,
29593
+ "step": 379200
29594
+ },
29595
+ {
29596
+ "epoch": 94.000228,
29597
+ "grad_norm": 1.9963804483413696,
29598
+ "learning_rate": 7.0939461437815354e-06,
29599
+ "loss": 1.744,
29600
+ "step": 379300
29601
+ },
29602
+ {
29603
+ "epoch": 95.00019,
29604
+ "grad_norm": 1.9092683792114258,
29605
+ "learning_rate": 7.082877194327953e-06,
29606
+ "loss": 1.7332,
29607
+ "step": 379400
29608
+ },
29609
+ {
29610
+ "epoch": 96.000152,
29611
+ "grad_norm": 1.9792388677597046,
29612
+ "learning_rate": 7.071815461889303e-06,
29613
+ "loss": 1.728,
29614
+ "step": 379500
29615
+ },
29616
+ {
29617
+ "epoch": 97.000114,
29618
+ "grad_norm": 1.9630019664764404,
29619
+ "learning_rate": 7.060760950921233e-06,
29620
+ "loss": 1.7224,
29621
+ "step": 379600
29622
+ },
29623
+ {
29624
+ "epoch": 98.000076,
29625
+ "grad_norm": 1.9032080173492432,
29626
+ "learning_rate": 7.049713665876509e-06,
29627
+ "loss": 1.7176,
29628
+ "step": 379700
29629
+ },
29630
+ {
29631
+ "epoch": 99.000038,
29632
+ "grad_norm": 1.9760445356369019,
29633
+ "learning_rate": 7.038673611204971e-06,
29634
+ "loss": 1.7142,
29635
+ "step": 379800
29636
+ },
29637
+ {
29638
+ "epoch": 99.000238,
29639
+ "grad_norm": 2.5537993907928467,
29640
+ "learning_rate": 7.027640791353562e-06,
29641
+ "loss": 1.7043,
29642
+ "step": 379900
29643
+ },
29644
+ {
29645
+ "epoch": 100.0002,
29646
+ "grad_norm": 1.9134443998336792,
29647
+ "learning_rate": 7.016615210766287e-06,
29648
+ "loss": 1.6935,
29649
+ "step": 380000
29650
+ },
29651
+ {
29652
+ "epoch": 100.0002,
29653
+ "eval_loss": 2.2129366397857666,
29654
+ "eval_runtime": 54.6255,
29655
+ "eval_samples_per_second": 186.616,
29656
+ "eval_steps_per_second": 1.465,
29657
+ "step": 380000
29658
+ },
29659
+ {
29660
+ "epoch": 101.000162,
29661
+ "grad_norm": 1.8621317148208618,
29662
+ "learning_rate": 7.005596873884254e-06,
29663
+ "loss": 1.7287,
29664
+ "step": 380100
29665
+ },
29666
+ {
29667
+ "epoch": 102.000124,
29668
+ "grad_norm": 2.0007071495056152,
29669
+ "learning_rate": 6.994585785145647e-06,
29670
+ "loss": 1.7216,
29671
+ "step": 380200
29672
+ },
29673
+ {
29674
+ "epoch": 103.000086,
29675
+ "grad_norm": 1.981418490409851,
29676
+ "learning_rate": 6.98358194898574e-06,
29677
+ "loss": 1.7192,
29678
+ "step": 380300
29679
+ },
29680
+ {
29681
+ "epoch": 104.000048,
29682
+ "grad_norm": 1.7912635803222656,
29683
+ "learning_rate": 6.972585369836865e-06,
29684
+ "loss": 1.7046,
29685
+ "step": 380400
29686
+ },
29687
+ {
29688
+ "epoch": 105.00001,
29689
+ "grad_norm": 1.9558844566345215,
29690
+ "learning_rate": 6.961596052128444e-06,
29691
+ "loss": 1.708,
29692
+ "step": 380500
29693
+ },
29694
+ {
29695
+ "epoch": 105.00021,
29696
+ "grad_norm": 1.9592783451080322,
29697
+ "learning_rate": 6.9506140002869756e-06,
29698
+ "loss": 1.699,
29699
+ "step": 380600
29700
+ },
29701
+ {
29702
+ "epoch": 106.000172,
29703
+ "grad_norm": 1.9580655097961426,
29704
+ "learning_rate": 6.939639218736041e-06,
29705
+ "loss": 1.6912,
29706
+ "step": 380700
29707
+ },
29708
+ {
29709
+ "epoch": 107.000134,
29710
+ "grad_norm": 1.9187573194503784,
29711
+ "learning_rate": 6.928671711896259e-06,
29712
+ "loss": 1.6864,
29713
+ "step": 380800
29714
+ },
29715
+ {
29716
+ "epoch": 108.000096,
29717
+ "grad_norm": 2.0804340839385986,
29718
+ "learning_rate": 6.917711484185349e-06,
29719
+ "loss": 1.6843,
29720
+ "step": 380900
29721
+ },
29722
+ {
29723
+ "epoch": 109.000058,
29724
+ "grad_norm": 1.9156286716461182,
29725
+ "learning_rate": 6.906758540018099e-06,
29726
+ "loss": 1.6788,
29727
+ "step": 381000
29728
+ },
29729
+ {
29730
+ "epoch": 109.000058,
29731
+ "eval_loss": 2.2096140384674072,
29732
+ "eval_runtime": 54.6776,
29733
+ "eval_samples_per_second": 186.438,
29734
+ "eval_steps_per_second": 1.463,
29735
+ "step": 381000
29736
+ },
29737
+ {
29738
+ "epoch": 110.00002,
29739
+ "grad_norm": 1.8327763080596924,
29740
+ "learning_rate": 6.895812883806341e-06,
29741
+ "loss": 1.703,
29742
+ "step": 381100
29743
+ },
29744
+ {
29745
+ "epoch": 110.00022,
29746
+ "grad_norm": 1.9155895709991455,
29747
+ "learning_rate": 6.884874519958984e-06,
29748
+ "loss": 1.6962,
29749
+ "step": 381200
29750
+ },
29751
+ {
29752
+ "epoch": 111.000182,
29753
+ "grad_norm": 1.8222503662109375,
29754
+ "learning_rate": 6.873943452882006e-06,
29755
+ "loss": 1.6917,
29756
+ "step": 381300
29757
+ },
29758
+ {
29759
+ "epoch": 112.000144,
29760
+ "grad_norm": 1.8987947702407837,
29761
+ "learning_rate": 6.863019686978445e-06,
29762
+ "loss": 1.6892,
29763
+ "step": 381400
29764
+ },
29765
+ {
29766
+ "epoch": 113.000106,
29767
+ "grad_norm": 1.8653353452682495,
29768
+ "learning_rate": 6.85210322664838e-06,
29769
+ "loss": 1.6867,
29770
+ "step": 381500
29771
+ },
29772
+ {
29773
+ "epoch": 114.000068,
29774
+ "grad_norm": 1.8713948726654053,
29775
+ "learning_rate": 6.841194076288962e-06,
29776
+ "loss": 1.6777,
29777
+ "step": 381600
29778
+ },
29779
+ {
29780
+ "epoch": 115.00003,
29781
+ "grad_norm": 1.9354687929153442,
29782
+ "learning_rate": 6.830292240294398e-06,
29783
+ "loss": 1.6756,
29784
+ "step": 381700
29785
+ },
29786
+ {
29787
+ "epoch": 115.00023,
29788
+ "grad_norm": 1.8539812564849854,
29789
+ "learning_rate": 6.8193977230559565e-06,
29790
+ "loss": 1.669,
29791
+ "step": 381800
29792
+ },
29793
+ {
29794
+ "epoch": 116.000192,
29795
+ "grad_norm": 1.913901448249817,
29796
+ "learning_rate": 6.808510528961928e-06,
29797
+ "loss": 1.6632,
29798
+ "step": 381900
29799
+ },
29800
+ {
29801
+ "epoch": 117.000154,
29802
+ "grad_norm": 1.8366894721984863,
29803
+ "learning_rate": 6.797630662397683e-06,
29804
+ "loss": 1.6619,
29805
+ "step": 382000
29806
+ },
29807
+ {
29808
+ "epoch": 117.000154,
29809
+ "eval_loss": 2.1981077194213867,
29810
+ "eval_runtime": 54.646,
29811
+ "eval_samples_per_second": 186.546,
29812
+ "eval_steps_per_second": 1.464,
29813
+ "step": 382000
29814
  }
29815
  ],
29816
  "logging_steps": 100,
 
29830
  "attributes": {}
29831
  }
29832
  },
29833
+ "total_flos": 3.333426940465899e+19,
29834
  "train_batch_size": 128,
29835
  "trial_name": null,
29836
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04f252a64f6373afbaec36fc31e345451d91b06580ee09a9823282cc3866516c
3
  size 5777
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a19fa79233fd468fcb689b7b8c5f704161aecb10646540b1133405c7c866d2ff
3
  size 5777