error577 commited on
Commit
8af7e48
·
verified ·
1 Parent(s): 87dc967

Training in progress, step 1900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c674bf9cdeb23c80b289ac43407765b1e78aedb4884b1385a152cf6a92469ca9
3
  size 528526760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06847da158ce168fb93e55cc61bf6bea8dd7967293bd7a3c8e8402b075b8fe4
3
  size 528526760
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:085e11b95cb9886558bd81f7c18c71d642195ef4464bfe7249b85b4efd5f0b49
3
  size 141172038
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2dadc0a09d187a8011aebcd91d84a82f37564642f08657fd83aef7232779b1b
3
  size 141172038
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:206426ce70c7f4de66ff8448310cacccd08ef2e69a6fa5935e72dbe8ba27401e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc0fc3759c3826ef1f53a065691416b3f84d400156909e771c89c5340e089c66
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a80a4389f665fb0206d9e4c906beb4f87034c401ec9e85d64dbadeb28ef38e37
3
  size 2080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69bc8f012b013c017f6532a85f380ecf6f76a1db060525a9d4f033160c892e6c
3
  size 2080
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.5684272050857544,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1600",
4
- "epoch": 0.3378061368114854,
5
  "eval_steps": 100,
6
- "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12759,6 +12759,714 @@
12759
  "eval_samples_per_second": 3.53,
12760
  "eval_steps_per_second": 1.773,
12761
  "step": 1800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12762
  }
12763
  ],
12764
  "logging_steps": 1,
@@ -12773,7 +13481,7 @@
12773
  "early_stopping_threshold": 0.0
12774
  },
12775
  "attributes": {
12776
- "early_stopping_patience_counter": 2
12777
  }
12778
  },
12779
  "TrainerControl": {
@@ -12787,7 +13495,7 @@
12787
  "attributes": {}
12788
  }
12789
  },
12790
- "total_flos": 3.236871551229235e+17,
12791
  "train_batch_size": 2,
12792
  "trial_name": null,
12793
  "trial_params": null
 
1
  {
2
  "best_metric": 0.5684272050857544,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1600",
4
+ "epoch": 0.3565731444121235,
5
  "eval_steps": 100,
6
+ "global_step": 1900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12759
  "eval_samples_per_second": 3.53,
12760
  "eval_steps_per_second": 1.773,
12761
  "step": 1800
12762
+ },
12763
+ {
12764
+ "epoch": 0.3379938068874918,
12765
+ "grad_norm": 7.328483581542969,
12766
+ "learning_rate": 0.00019398047879803926,
12767
+ "loss": 1.9055,
12768
+ "step": 1801
12769
+ },
12770
+ {
12771
+ "epoch": 0.33818147696349815,
12772
+ "grad_norm": 6.055893421173096,
12773
+ "learning_rate": 0.00019397375581320375,
12774
+ "loss": 2.4346,
12775
+ "step": 1802
12776
+ },
12777
+ {
12778
+ "epoch": 0.33836914703950455,
12779
+ "grad_norm": 8.04631519317627,
12780
+ "learning_rate": 0.00019396701827645302,
12781
+ "loss": 1.5086,
12782
+ "step": 1803
12783
+ },
12784
+ {
12785
+ "epoch": 0.33855681711551094,
12786
+ "grad_norm": 6.279934406280518,
12787
+ "learning_rate": 0.00019396026618778706,
12788
+ "loss": 2.2011,
12789
+ "step": 1804
12790
+ },
12791
+ {
12792
+ "epoch": 0.3387444871915173,
12793
+ "grad_norm": 7.011415481567383,
12794
+ "learning_rate": 0.00019395352865103632,
12795
+ "loss": 2.5766,
12796
+ "step": 1805
12797
+ },
12798
+ {
12799
+ "epoch": 0.3389321572675237,
12800
+ "grad_norm": 11.007534980773926,
12801
+ "learning_rate": 0.0001939467911142856,
12802
+ "loss": 3.25,
12803
+ "step": 1806
12804
+ },
12805
+ {
12806
+ "epoch": 0.3391198273435301,
12807
+ "grad_norm": 4.763912200927734,
12808
+ "learning_rate": 0.00019394003902561963,
12809
+ "loss": 1.7699,
12810
+ "step": 1807
12811
+ },
12812
+ {
12813
+ "epoch": 0.3393074974195365,
12814
+ "grad_norm": 7.067093849182129,
12815
+ "learning_rate": 0.00019393328693695366,
12816
+ "loss": 2.0153,
12817
+ "step": 1808
12818
+ },
12819
+ {
12820
+ "epoch": 0.3394951674955428,
12821
+ "grad_norm": 6.018016338348389,
12822
+ "learning_rate": 0.0001939265348482877,
12823
+ "loss": 1.0305,
12824
+ "step": 1809
12825
+ },
12826
+ {
12827
+ "epoch": 0.3396828375715492,
12828
+ "grad_norm": 5.589052677154541,
12829
+ "learning_rate": 0.0001939197682077065,
12830
+ "loss": 1.623,
12831
+ "step": 1810
12832
+ },
12833
+ {
12834
+ "epoch": 0.3398705076475556,
12835
+ "grad_norm": 4.106575965881348,
12836
+ "learning_rate": 0.00019391300156712532,
12837
+ "loss": 2.3932,
12838
+ "step": 1811
12839
+ },
12840
+ {
12841
+ "epoch": 0.340058177723562,
12842
+ "grad_norm": 6.3381171226501465,
12843
+ "learning_rate": 0.00019390624947845936,
12844
+ "loss": 1.859,
12845
+ "step": 1812
12846
+ },
12847
+ {
12848
+ "epoch": 0.34024584779956835,
12849
+ "grad_norm": 7.823512554168701,
12850
+ "learning_rate": 0.00019389946828596294,
12851
+ "loss": 2.0446,
12852
+ "step": 1813
12853
+ },
12854
+ {
12855
+ "epoch": 0.34043351787557474,
12856
+ "grad_norm": 4.890605449676514,
12857
+ "learning_rate": 0.00019389268709346652,
12858
+ "loss": 0.8467,
12859
+ "step": 1814
12860
+ },
12861
+ {
12862
+ "epoch": 0.34062118795158114,
12863
+ "grad_norm": 7.945042610168457,
12864
+ "learning_rate": 0.00019388592045288533,
12865
+ "loss": 2.6165,
12866
+ "step": 1815
12867
+ },
12868
+ {
12869
+ "epoch": 0.3408088580275875,
12870
+ "grad_norm": 4.810164451599121,
12871
+ "learning_rate": 0.0001938791392603889,
12872
+ "loss": 1.1764,
12873
+ "step": 1816
12874
+ },
12875
+ {
12876
+ "epoch": 0.3409965281035939,
12877
+ "grad_norm": 6.62808895111084,
12878
+ "learning_rate": 0.0001938723580678925,
12879
+ "loss": 2.3356,
12880
+ "step": 1817
12881
+ },
12882
+ {
12883
+ "epoch": 0.3411841981796003,
12884
+ "grad_norm": 8.076144218444824,
12885
+ "learning_rate": 0.00019386556232348084,
12886
+ "loss": 2.2133,
12887
+ "step": 1818
12888
+ },
12889
+ {
12890
+ "epoch": 0.34137186825560667,
12891
+ "grad_norm": 10.573699951171875,
12892
+ "learning_rate": 0.0001938587665790692,
12893
+ "loss": 2.3251,
12894
+ "step": 1819
12895
+ },
12896
+ {
12897
+ "epoch": 0.341559538331613,
12898
+ "grad_norm": 5.872825622558594,
12899
+ "learning_rate": 0.00019385197083465755,
12900
+ "loss": 1.8493,
12901
+ "step": 1820
12902
+ },
12903
+ {
12904
+ "epoch": 0.3417472084076194,
12905
+ "grad_norm": 3.5163869857788086,
12906
+ "learning_rate": 0.0001938451750902459,
12907
+ "loss": 1.1675,
12908
+ "step": 1821
12909
+ },
12910
+ {
12911
+ "epoch": 0.3419348784836258,
12912
+ "grad_norm": 6.068939685821533,
12913
+ "learning_rate": 0.00019383836479391903,
12914
+ "loss": 1.7621,
12915
+ "step": 1822
12916
+ },
12917
+ {
12918
+ "epoch": 0.34212254855963214,
12919
+ "grad_norm": 5.792971611022949,
12920
+ "learning_rate": 0.00019383155449759215,
12921
+ "loss": 1.8668,
12922
+ "step": 1823
12923
+ },
12924
+ {
12925
+ "epoch": 0.34231021863563854,
12926
+ "grad_norm": 8.612492561340332,
12927
+ "learning_rate": 0.00019382474420126528,
12928
+ "loss": 2.2727,
12929
+ "step": 1824
12930
+ },
12931
+ {
12932
+ "epoch": 0.34249788871164494,
12933
+ "grad_norm": 5.430174827575684,
12934
+ "learning_rate": 0.0001938179339049384,
12935
+ "loss": 1.7228,
12936
+ "step": 1825
12937
+ },
12938
+ {
12939
+ "epoch": 0.34268555878765133,
12940
+ "grad_norm": 6.0315375328063965,
12941
+ "learning_rate": 0.0001938111090566963,
12942
+ "loss": 2.283,
12943
+ "step": 1826
12944
+ },
12945
+ {
12946
+ "epoch": 0.3428732288636577,
12947
+ "grad_norm": 5.934972763061523,
12948
+ "learning_rate": 0.00019380429876036942,
12949
+ "loss": 1.37,
12950
+ "step": 1827
12951
+ },
12952
+ {
12953
+ "epoch": 0.34306089893966407,
12954
+ "grad_norm": 8.644633293151855,
12955
+ "learning_rate": 0.00019379747391212732,
12956
+ "loss": 1.7698,
12957
+ "step": 1828
12958
+ },
12959
+ {
12960
+ "epoch": 0.34324856901567047,
12961
+ "grad_norm": 6.068434238433838,
12962
+ "learning_rate": 0.0001937906490638852,
12963
+ "loss": 2.4035,
12964
+ "step": 1829
12965
+ },
12966
+ {
12967
+ "epoch": 0.3434362390916768,
12968
+ "grad_norm": 5.352113723754883,
12969
+ "learning_rate": 0.00019378380966372788,
12970
+ "loss": 1.4043,
12971
+ "step": 1830
12972
+ },
12973
+ {
12974
+ "epoch": 0.3436239091676832,
12975
+ "grad_norm": 5.741805553436279,
12976
+ "learning_rate": 0.00019377697026357055,
12977
+ "loss": 2.3867,
12978
+ "step": 1831
12979
+ },
12980
+ {
12981
+ "epoch": 0.3438115792436896,
12982
+ "grad_norm": 5.221137523651123,
12983
+ "learning_rate": 0.00019377013086341321,
12984
+ "loss": 2.5909,
12985
+ "step": 1832
12986
+ },
12987
+ {
12988
+ "epoch": 0.343999249319696,
12989
+ "grad_norm": 5.222947597503662,
12990
+ "learning_rate": 0.00019376329146325588,
12991
+ "loss": 1.8876,
12992
+ "step": 1833
12993
+ },
12994
+ {
12995
+ "epoch": 0.34418691939570234,
12996
+ "grad_norm": 4.082976341247559,
12997
+ "learning_rate": 0.00019375643751118332,
12998
+ "loss": 1.4823,
12999
+ "step": 1834
13000
+ },
13001
+ {
13002
+ "epoch": 0.34437458947170874,
13003
+ "grad_norm": 4.595718860626221,
13004
+ "learning_rate": 0.00019374958355911076,
13005
+ "loss": 1.3744,
13006
+ "step": 1835
13007
+ },
13008
+ {
13009
+ "epoch": 0.34456225954771513,
13010
+ "grad_norm": 5.016329765319824,
13011
+ "learning_rate": 0.0001937427296070382,
13012
+ "loss": 2.3715,
13013
+ "step": 1836
13014
+ },
13015
+ {
13016
+ "epoch": 0.3447499296237215,
13017
+ "grad_norm": 4.979654312133789,
13018
+ "learning_rate": 0.00019373587565496564,
13019
+ "loss": 1.7598,
13020
+ "step": 1837
13021
+ },
13022
+ {
13023
+ "epoch": 0.34493759969972787,
13024
+ "grad_norm": 5.692010879516602,
13025
+ "learning_rate": 0.00019372900715097785,
13026
+ "loss": 1.5982,
13027
+ "step": 1838
13028
+ },
13029
+ {
13030
+ "epoch": 0.34512526977573427,
13031
+ "grad_norm": 6.913799285888672,
13032
+ "learning_rate": 0.0001937221531989053,
13033
+ "loss": 1.9036,
13034
+ "step": 1839
13035
+ },
13036
+ {
13037
+ "epoch": 0.34531293985174066,
13038
+ "grad_norm": 5.701316833496094,
13039
+ "learning_rate": 0.0001937152846949175,
13040
+ "loss": 1.9009,
13041
+ "step": 1840
13042
+ },
13043
+ {
13044
+ "epoch": 0.345500609927747,
13045
+ "grad_norm": 19.9186954498291,
13046
+ "learning_rate": 0.0001937084161909297,
13047
+ "loss": 2.4767,
13048
+ "step": 1841
13049
+ },
13050
+ {
13051
+ "epoch": 0.3456882800037534,
13052
+ "grad_norm": 5.613144874572754,
13053
+ "learning_rate": 0.0001937015331350267,
13054
+ "loss": 3.6305,
13055
+ "step": 1842
13056
+ },
13057
+ {
13058
+ "epoch": 0.3458759500797598,
13059
+ "grad_norm": 3.7392101287841797,
13060
+ "learning_rate": 0.0001936946646310389,
13061
+ "loss": 1.1532,
13062
+ "step": 1843
13063
+ },
13064
+ {
13065
+ "epoch": 0.34606362015576614,
13066
+ "grad_norm": 7.338042259216309,
13067
+ "learning_rate": 0.00019368776702322066,
13068
+ "loss": 1.95,
13069
+ "step": 1844
13070
+ },
13071
+ {
13072
+ "epoch": 0.34625129023177253,
13073
+ "grad_norm": 5.003443717956543,
13074
+ "learning_rate": 0.00019368088396731764,
13075
+ "loss": 1.7028,
13076
+ "step": 1845
13077
+ },
13078
+ {
13079
+ "epoch": 0.34643896030777893,
13080
+ "grad_norm": 6.806427001953125,
13081
+ "learning_rate": 0.00019367400091141462,
13082
+ "loss": 2.4638,
13083
+ "step": 1846
13084
+ },
13085
+ {
13086
+ "epoch": 0.34662663038378533,
13087
+ "grad_norm": 5.704411506652832,
13088
+ "learning_rate": 0.00019366710330359638,
13089
+ "loss": 2.7797,
13090
+ "step": 1847
13091
+ },
13092
+ {
13093
+ "epoch": 0.34681430045979167,
13094
+ "grad_norm": 3.765601873397827,
13095
+ "learning_rate": 0.00019366020569577813,
13096
+ "loss": 1.8054,
13097
+ "step": 1848
13098
+ },
13099
+ {
13100
+ "epoch": 0.34700197053579807,
13101
+ "grad_norm": 5.865424156188965,
13102
+ "learning_rate": 0.00019365330808795989,
13103
+ "loss": 2.2234,
13104
+ "step": 1849
13105
+ },
13106
+ {
13107
+ "epoch": 0.34718964061180446,
13108
+ "grad_norm": 9.001687049865723,
13109
+ "learning_rate": 0.0001936463959282264,
13110
+ "loss": 2.0534,
13111
+ "step": 1850
13112
+ },
13113
+ {
13114
+ "epoch": 0.3473773106878108,
13115
+ "grad_norm": 3.3067872524261475,
13116
+ "learning_rate": 0.00019363948376849294,
13117
+ "loss": 1.7267,
13118
+ "step": 1851
13119
+ },
13120
+ {
13121
+ "epoch": 0.3475649807638172,
13122
+ "grad_norm": 6.117793560028076,
13123
+ "learning_rate": 0.00019363257160875946,
13124
+ "loss": 1.721,
13125
+ "step": 1852
13126
+ },
13127
+ {
13128
+ "epoch": 0.3477526508398236,
13129
+ "grad_norm": 9.610057830810547,
13130
+ "learning_rate": 0.000193625659449026,
13131
+ "loss": 4.0058,
13132
+ "step": 1853
13133
+ },
13134
+ {
13135
+ "epoch": 0.34794032091583,
13136
+ "grad_norm": 5.996690273284912,
13137
+ "learning_rate": 0.00019361873273737729,
13138
+ "loss": 1.9939,
13139
+ "step": 1854
13140
+ },
13141
+ {
13142
+ "epoch": 0.34812799099183633,
13143
+ "grad_norm": 4.617966175079346,
13144
+ "learning_rate": 0.00019361180602572858,
13145
+ "loss": 1.1395,
13146
+ "step": 1855
13147
+ },
13148
+ {
13149
+ "epoch": 0.34831566106784273,
13150
+ "grad_norm": 6.157725811004639,
13151
+ "learning_rate": 0.00019360487931407988,
13152
+ "loss": 1.5481,
13153
+ "step": 1856
13154
+ },
13155
+ {
13156
+ "epoch": 0.3485033311438491,
13157
+ "grad_norm": 4.873600482940674,
13158
+ "learning_rate": 0.00019359795260243118,
13159
+ "loss": 1.944,
13160
+ "step": 1857
13161
+ },
13162
+ {
13163
+ "epoch": 0.34869100121985547,
13164
+ "grad_norm": 4.755199432373047,
13165
+ "learning_rate": 0.00019359101133886725,
13166
+ "loss": 1.2533,
13167
+ "step": 1858
13168
+ },
13169
+ {
13170
+ "epoch": 0.34887867129586186,
13171
+ "grad_norm": 6.5496392250061035,
13172
+ "learning_rate": 0.00019358408462721854,
13173
+ "loss": 2.6968,
13174
+ "step": 1859
13175
+ },
13176
+ {
13177
+ "epoch": 0.34906634137186826,
13178
+ "grad_norm": 8.573905944824219,
13179
+ "learning_rate": 0.0001935771433636546,
13180
+ "loss": 2.1588,
13181
+ "step": 1860
13182
+ },
13183
+ {
13184
+ "epoch": 0.34925401144787466,
13185
+ "grad_norm": 10.172856330871582,
13186
+ "learning_rate": 0.00019357018754817545,
13187
+ "loss": 2.2443,
13188
+ "step": 1861
13189
+ },
13190
+ {
13191
+ "epoch": 0.349441681523881,
13192
+ "grad_norm": 9.657448768615723,
13193
+ "learning_rate": 0.00019356324628461152,
13194
+ "loss": 2.5489,
13195
+ "step": 1862
13196
+ },
13197
+ {
13198
+ "epoch": 0.3496293515998874,
13199
+ "grad_norm": 4.509249210357666,
13200
+ "learning_rate": 0.00019355629046913236,
13201
+ "loss": 1.3726,
13202
+ "step": 1863
13203
+ },
13204
+ {
13205
+ "epoch": 0.3498170216758938,
13206
+ "grad_norm": 3.9321093559265137,
13207
+ "learning_rate": 0.00019354934920556843,
13208
+ "loss": 2.2097,
13209
+ "step": 1864
13210
+ },
13211
+ {
13212
+ "epoch": 0.35000469175190013,
13213
+ "grad_norm": 5.736708641052246,
13214
+ "learning_rate": 0.00019354237883817405,
13215
+ "loss": 1.9158,
13216
+ "step": 1865
13217
+ },
13218
+ {
13219
+ "epoch": 0.35019236182790653,
13220
+ "grad_norm": 7.1537089347839355,
13221
+ "learning_rate": 0.00019353542302269489,
13222
+ "loss": 2.3103,
13223
+ "step": 1866
13224
+ },
13225
+ {
13226
+ "epoch": 0.3503800319039129,
13227
+ "grad_norm": 17.57890510559082,
13228
+ "learning_rate": 0.00019352843810338527,
13229
+ "loss": 2.7007,
13230
+ "step": 1867
13231
+ },
13232
+ {
13233
+ "epoch": 0.3505677019799193,
13234
+ "grad_norm": 4.420182228088379,
13235
+ "learning_rate": 0.0001935214822879061,
13236
+ "loss": 1.8044,
13237
+ "step": 1868
13238
+ },
13239
+ {
13240
+ "epoch": 0.35075537205592566,
13241
+ "grad_norm": 5.2922234535217285,
13242
+ "learning_rate": 0.00019351451192051172,
13243
+ "loss": 1.9718,
13244
+ "step": 1869
13245
+ },
13246
+ {
13247
+ "epoch": 0.35094304213193206,
13248
+ "grad_norm": 6.406688690185547,
13249
+ "learning_rate": 0.0001935075270012021,
13250
+ "loss": 2.7322,
13251
+ "step": 1870
13252
+ },
13253
+ {
13254
+ "epoch": 0.35113071220793846,
13255
+ "grad_norm": 5.1606974601745605,
13256
+ "learning_rate": 0.00019350055663380772,
13257
+ "loss": 1.9587,
13258
+ "step": 1871
13259
+ },
13260
+ {
13261
+ "epoch": 0.35131838228394485,
13262
+ "grad_norm": 6.796870708465576,
13263
+ "learning_rate": 0.00019349355716258287,
13264
+ "loss": 2.0894,
13265
+ "step": 1872
13266
+ },
13267
+ {
13268
+ "epoch": 0.3515060523599512,
13269
+ "grad_norm": 6.20542573928833,
13270
+ "learning_rate": 0.00019348657224327326,
13271
+ "loss": 2.7715,
13272
+ "step": 1873
13273
+ },
13274
+ {
13275
+ "epoch": 0.3516937224359576,
13276
+ "grad_norm": 5.50722599029541,
13277
+ "learning_rate": 0.00019347958732396364,
13278
+ "loss": 1.8027,
13279
+ "step": 1874
13280
+ },
13281
+ {
13282
+ "epoch": 0.351881392511964,
13283
+ "grad_norm": 7.180240631103516,
13284
+ "learning_rate": 0.0001934725878527388,
13285
+ "loss": 2.0733,
13286
+ "step": 1875
13287
+ },
13288
+ {
13289
+ "epoch": 0.3520690625879703,
13290
+ "grad_norm": 6.353847980499268,
13291
+ "learning_rate": 0.00019346558838151395,
13292
+ "loss": 2.6648,
13293
+ "step": 1876
13294
+ },
13295
+ {
13296
+ "epoch": 0.3522567326639767,
13297
+ "grad_norm": 5.271063327789307,
13298
+ "learning_rate": 0.00019345857435837388,
13299
+ "loss": 1.5801,
13300
+ "step": 1877
13301
+ },
13302
+ {
13303
+ "epoch": 0.3524444027399831,
13304
+ "grad_norm": 5.9332380294799805,
13305
+ "learning_rate": 0.00019345157488714904,
13306
+ "loss": 2.1235,
13307
+ "step": 1878
13308
+ },
13309
+ {
13310
+ "epoch": 0.3526320728159895,
13311
+ "grad_norm": 8.27168083190918,
13312
+ "learning_rate": 0.00019344456086400896,
13313
+ "loss": 2.6918,
13314
+ "step": 1879
13315
+ },
13316
+ {
13317
+ "epoch": 0.35281974289199586,
13318
+ "grad_norm": 5.08257532119751,
13319
+ "learning_rate": 0.0001934375468408689,
13320
+ "loss": 1.2914,
13321
+ "step": 1880
13322
+ },
13323
+ {
13324
+ "epoch": 0.35300741296800225,
13325
+ "grad_norm": 7.454222679138184,
13326
+ "learning_rate": 0.00019343053281772882,
13327
+ "loss": 2.7986,
13328
+ "step": 1881
13329
+ },
13330
+ {
13331
+ "epoch": 0.35319508304400865,
13332
+ "grad_norm": 3.519970417022705,
13333
+ "learning_rate": 0.00019342350424267352,
13334
+ "loss": 1.4717,
13335
+ "step": 1882
13336
+ },
13337
+ {
13338
+ "epoch": 0.353382753120015,
13339
+ "grad_norm": 7.176670074462891,
13340
+ "learning_rate": 0.00019341649021953344,
13341
+ "loss": 2.6793,
13342
+ "step": 1883
13343
+ },
13344
+ {
13345
+ "epoch": 0.3535704231960214,
13346
+ "grad_norm": 9.557621002197266,
13347
+ "learning_rate": 0.00019340946164447814,
13348
+ "loss": 3.08,
13349
+ "step": 1884
13350
+ },
13351
+ {
13352
+ "epoch": 0.3537580932720278,
13353
+ "grad_norm": 6.917932510375977,
13354
+ "learning_rate": 0.00019340243306942284,
13355
+ "loss": 2.3398,
13356
+ "step": 1885
13357
+ },
13358
+ {
13359
+ "epoch": 0.3539457633480342,
13360
+ "grad_norm": 4.977582931518555,
13361
+ "learning_rate": 0.0001933953899424523,
13362
+ "loss": 1.6738,
13363
+ "step": 1886
13364
+ },
13365
+ {
13366
+ "epoch": 0.3541334334240405,
13367
+ "grad_norm": 5.958339691162109,
13368
+ "learning_rate": 0.00019338834681548178,
13369
+ "loss": 1.9964,
13370
+ "step": 1887
13371
+ },
13372
+ {
13373
+ "epoch": 0.3543211035000469,
13374
+ "grad_norm": 7.204160213470459,
13375
+ "learning_rate": 0.00019338130368851125,
13376
+ "loss": 2.4598,
13377
+ "step": 1888
13378
+ },
13379
+ {
13380
+ "epoch": 0.3545087735760533,
13381
+ "grad_norm": 7.081577777862549,
13382
+ "learning_rate": 0.00019337426056154072,
13383
+ "loss": 2.799,
13384
+ "step": 1889
13385
+ },
13386
+ {
13387
+ "epoch": 0.35469644365205966,
13388
+ "grad_norm": 6.637358665466309,
13389
+ "learning_rate": 0.0001933672174345702,
13390
+ "loss": 2.0836,
13391
+ "step": 1890
13392
+ },
13393
+ {
13394
+ "epoch": 0.35488411372806605,
13395
+ "grad_norm": 10.060577392578125,
13396
+ "learning_rate": 0.00019336015975568444,
13397
+ "loss": 1.6455,
13398
+ "step": 1891
13399
+ },
13400
+ {
13401
+ "epoch": 0.35507178380407245,
13402
+ "grad_norm": 6.906970024108887,
13403
+ "learning_rate": 0.00019335310207679868,
13404
+ "loss": 1.9957,
13405
+ "step": 1892
13406
+ },
13407
+ {
13408
+ "epoch": 0.35525945388007885,
13409
+ "grad_norm": 7.135964870452881,
13410
+ "learning_rate": 0.0001933460298459977,
13411
+ "loss": 1.2429,
13412
+ "step": 1893
13413
+ },
13414
+ {
13415
+ "epoch": 0.3554471239560852,
13416
+ "grad_norm": 7.937412261962891,
13417
+ "learning_rate": 0.00019333897216711193,
13418
+ "loss": 2.0331,
13419
+ "step": 1894
13420
+ },
13421
+ {
13422
+ "epoch": 0.3556347940320916,
13423
+ "grad_norm": 5.553816318511963,
13424
+ "learning_rate": 0.00019333189993631095,
13425
+ "loss": 1.6946,
13426
+ "step": 1895
13427
+ },
13428
+ {
13429
+ "epoch": 0.355822464108098,
13430
+ "grad_norm": 7.168800354003906,
13431
+ "learning_rate": 0.0001933248422574252,
13432
+ "loss": 2.1942,
13433
+ "step": 1896
13434
+ },
13435
+ {
13436
+ "epoch": 0.3560101341841043,
13437
+ "grad_norm": 7.036073684692383,
13438
+ "learning_rate": 0.00019331775547470897,
13439
+ "loss": 2.5773,
13440
+ "step": 1897
13441
+ },
13442
+ {
13443
+ "epoch": 0.3561978042601107,
13444
+ "grad_norm": 12.46209716796875,
13445
+ "learning_rate": 0.00019331066869199276,
13446
+ "loss": 2.8978,
13447
+ "step": 1898
13448
+ },
13449
+ {
13450
+ "epoch": 0.3563854743361171,
13451
+ "grad_norm": 6.11179780960083,
13452
+ "learning_rate": 0.00019330359646119177,
13453
+ "loss": 2.1355,
13454
+ "step": 1899
13455
+ },
13456
+ {
13457
+ "epoch": 0.3565731444121235,
13458
+ "grad_norm": 5.809276580810547,
13459
+ "learning_rate": 0.00019329650967847556,
13460
+ "loss": 1.7289,
13461
+ "step": 1900
13462
+ },
13463
+ {
13464
+ "epoch": 0.3565731444121235,
13465
+ "eval_loss": 0.5747910141944885,
13466
+ "eval_runtime": 60.9369,
13467
+ "eval_samples_per_second": 3.528,
13468
+ "eval_steps_per_second": 1.772,
13469
+ "step": 1900
13470
  }
13471
  ],
13472
  "logging_steps": 1,
 
13481
  "early_stopping_threshold": 0.0
13482
  },
13483
  "attributes": {
13484
+ "early_stopping_patience_counter": 3
13485
  }
13486
  },
13487
  "TrainerControl": {
 
13495
  "attributes": {}
13496
  }
13497
  },
13498
+ "total_flos": 3.420104552257413e+17,
13499
  "train_batch_size": 2,
13500
  "trial_name": null,
13501
  "trial_params": null