staqc-sandboxes-traces-terminus-2 / trainer_state.json
penfever's picture
End of training
7359a5e verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 775,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03231017770597738,
"grad_norm": 6.731328532589346,
"learning_rate": 2.0512820512820513e-06,
"loss": 0.7811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18013827502727509,
"step": 5,
"valid_targets_mean": 3120.6,
"valid_targets_min": 739
},
{
"epoch": 0.06462035541195477,
"grad_norm": 4.158689615969652,
"learning_rate": 4.615384615384616e-06,
"loss": 0.7582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17159777879714966,
"step": 10,
"valid_targets_mean": 3504.9,
"valid_targets_min": 487
},
{
"epoch": 0.09693053311793215,
"grad_norm": 2.4589720035548734,
"learning_rate": 7.17948717948718e-06,
"loss": 0.701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14925020933151245,
"step": 15,
"valid_targets_mean": 3731.6,
"valid_targets_min": 1279
},
{
"epoch": 0.12924071082390953,
"grad_norm": 0.9663503382808891,
"learning_rate": 9.743589743589744e-06,
"loss": 0.6601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17283087968826294,
"step": 20,
"valid_targets_mean": 4027.5,
"valid_targets_min": 501
},
{
"epoch": 0.16155088852988692,
"grad_norm": 0.8054644194990555,
"learning_rate": 1.230769230769231e-05,
"loss": 0.6369,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17246821522712708,
"step": 25,
"valid_targets_mean": 3996.4,
"valid_targets_min": 1977
},
{
"epoch": 0.1938610662358643,
"grad_norm": 0.5216073107230456,
"learning_rate": 1.4871794871794874e-05,
"loss": 0.6002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14975571632385254,
"step": 30,
"valid_targets_mean": 4030.6,
"valid_targets_min": 399
},
{
"epoch": 0.22617124394184168,
"grad_norm": 0.5196065116767887,
"learning_rate": 1.7435897435897438e-05,
"loss": 0.5885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14672940969467163,
"step": 35,
"valid_targets_mean": 4130.4,
"valid_targets_min": 984
},
{
"epoch": 0.25848142164781907,
"grad_norm": 0.38054025010847065,
"learning_rate": 2e-05,
"loss": 0.5665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1372590959072113,
"step": 40,
"valid_targets_mean": 4145.8,
"valid_targets_min": 1033
},
{
"epoch": 0.29079159935379645,
"grad_norm": 0.3929948606013641,
"learning_rate": 2.2564102564102566e-05,
"loss": 0.563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13531070947647095,
"step": 45,
"valid_targets_mean": 3306.5,
"valid_targets_min": 1157
},
{
"epoch": 0.32310177705977383,
"grad_norm": 0.32153200327415404,
"learning_rate": 2.512820512820513e-05,
"loss": 0.5405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14371386170387268,
"step": 50,
"valid_targets_mean": 4432.1,
"valid_targets_min": 571
},
{
"epoch": 0.3554119547657512,
"grad_norm": 0.30016200515564767,
"learning_rate": 2.7692307692307694e-05,
"loss": 0.5318,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12094619125127792,
"step": 55,
"valid_targets_mean": 3439.8,
"valid_targets_min": 1091
},
{
"epoch": 0.3877221324717286,
"grad_norm": 0.32812658031573905,
"learning_rate": 3.0256410256410257e-05,
"loss": 0.5183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1525593101978302,
"step": 60,
"valid_targets_mean": 4788.1,
"valid_targets_min": 508
},
{
"epoch": 0.420032310177706,
"grad_norm": 0.31480454051703594,
"learning_rate": 3.282051282051282e-05,
"loss": 0.5237,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10941419005393982,
"step": 65,
"valid_targets_mean": 2808.4,
"valid_targets_min": 573
},
{
"epoch": 0.45234248788368336,
"grad_norm": 0.2696632862359051,
"learning_rate": 3.538461538461539e-05,
"loss": 0.5084,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13120058178901672,
"step": 70,
"valid_targets_mean": 4135.1,
"valid_targets_min": 937
},
{
"epoch": 0.48465266558966075,
"grad_norm": 0.6024620941620338,
"learning_rate": 3.794871794871795e-05,
"loss": 0.5068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1219104528427124,
"step": 75,
"valid_targets_mean": 3757.5,
"valid_targets_min": 1028
},
{
"epoch": 0.5169628432956381,
"grad_norm": 0.28664290005490634,
"learning_rate": 3.999979684222212e-05,
"loss": 0.5038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13252684473991394,
"step": 80,
"valid_targets_mean": 4096.0,
"valid_targets_min": 729
},
{
"epoch": 0.5492730210016155,
"grad_norm": 0.29814119507104336,
"learning_rate": 3.999268675335385e-05,
"loss": 0.4891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11344544589519501,
"step": 85,
"valid_targets_mean": 3552.1,
"valid_targets_min": 814
},
{
"epoch": 0.5815831987075929,
"grad_norm": 0.3159183051465075,
"learning_rate": 3.997542290252236e-05,
"loss": 0.4843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12366665154695511,
"step": 90,
"valid_targets_mean": 3477.1,
"valid_targets_min": 979
},
{
"epoch": 0.6138933764135702,
"grad_norm": 0.266069868151099,
"learning_rate": 3.9948014057585294e-05,
"loss": 0.4803,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11242301762104034,
"step": 95,
"valid_targets_mean": 3989.3,
"valid_targets_min": 590
},
{
"epoch": 0.6462035541195477,
"grad_norm": 0.28400332082322377,
"learning_rate": 3.991047413877713e-05,
"loss": 0.4804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13703756034374237,
"step": 100,
"valid_targets_mean": 3988.6,
"valid_targets_min": 1313
},
{
"epoch": 0.678513731825525,
"grad_norm": 0.2959731557583916,
"learning_rate": 3.98628222116394e-05,
"loss": 0.4897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10094247758388519,
"step": 105,
"valid_targets_mean": 3035.1,
"valid_targets_min": 593
},
{
"epoch": 0.7108239095315024,
"grad_norm": 0.3021715068251041,
"learning_rate": 3.9805082477337815e-05,
"loss": 0.4628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12388584017753601,
"step": 110,
"valid_targets_mean": 3724.1,
"valid_targets_min": 1076
},
{
"epoch": 0.7431340872374798,
"grad_norm": 0.2856991041044821,
"learning_rate": 3.9737284260371144e-05,
"loss": 0.471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11789651215076447,
"step": 115,
"valid_targets_mean": 3853.4,
"valid_targets_min": 857
},
{
"epoch": 0.7754442649434572,
"grad_norm": 0.28392806599052883,
"learning_rate": 3.965946199367804e-05,
"loss": 0.4804,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11591988056898117,
"step": 120,
"valid_targets_mean": 3771.6,
"valid_targets_min": 730
},
{
"epoch": 0.8077544426494345,
"grad_norm": 0.2919653111659463,
"learning_rate": 3.957165520114948e-05,
"loss": 0.4682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13558943569660187,
"step": 125,
"valid_targets_mean": 3917.8,
"valid_targets_min": 1219
},
{
"epoch": 0.840064620355412,
"grad_norm": 0.3238305897868469,
"learning_rate": 3.947390847755559e-05,
"loss": 0.4732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11949899047613144,
"step": 130,
"valid_targets_mean": 3899.7,
"valid_targets_min": 770
},
{
"epoch": 0.8723747980613893,
"grad_norm": 0.30925326761097366,
"learning_rate": 3.936627146589715e-05,
"loss": 0.4776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12182562053203583,
"step": 135,
"valid_targets_mean": 3646.8,
"valid_targets_min": 516
},
{
"epoch": 0.9046849757673667,
"grad_norm": 0.27131763808428006,
"learning_rate": 3.92487988321932e-05,
"loss": 0.4582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1010097786784172,
"step": 140,
"valid_targets_mean": 3460.2,
"valid_targets_min": 905
},
{
"epoch": 0.9369951534733441,
"grad_norm": 0.2787680956479995,
"learning_rate": 3.912155023771762e-05,
"loss": 0.4679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12193842232227325,
"step": 145,
"valid_targets_mean": 4362.3,
"valid_targets_min": 1958
},
{
"epoch": 0.9693053311793215,
"grad_norm": 0.2993760143538429,
"learning_rate": 3.89845903086987e-05,
"loss": 0.4523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10849137604236603,
"step": 150,
"valid_targets_mean": 3652.4,
"valid_targets_min": 1075
},
{
"epoch": 1.0,
"grad_norm": 0.3292167259914328,
"learning_rate": 3.883798860349722e-05,
"loss": 0.4587,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1710975468158722,
"step": 155,
"valid_targets_mean": 4421.6,
"valid_targets_min": 1262
},
{
"epoch": 1.0323101777059773,
"grad_norm": 0.2695765904096439,
"learning_rate": 3.8681819577279515e-05,
"loss": 0.4468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11836530268192291,
"step": 160,
"valid_targets_mean": 4131.2,
"valid_targets_min": 770
},
{
"epoch": 1.0646203554119547,
"grad_norm": 0.283031599096208,
"learning_rate": 3.85161625442037e-05,
"loss": 0.4548,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0947444885969162,
"step": 165,
"valid_targets_mean": 3082.1,
"valid_targets_min": 980
},
{
"epoch": 1.0969305331179322,
"grad_norm": 0.2757114518295554,
"learning_rate": 3.834110163713806e-05,
"loss": 0.4502,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11827362328767776,
"step": 170,
"valid_targets_mean": 4066.2,
"valid_targets_min": 1746
},
{
"epoch": 1.1292407108239095,
"grad_norm": 0.2861879786831059,
"learning_rate": 3.81567257649322e-05,
"loss": 0.4629,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1151590347290039,
"step": 175,
"valid_targets_mean": 3779.0,
"valid_targets_min": 760
},
{
"epoch": 1.1615508885298869,
"grad_norm": 0.28247290465940766,
"learning_rate": 3.796312856726252e-05,
"loss": 0.4695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1255260705947876,
"step": 180,
"valid_targets_mean": 3885.4,
"valid_targets_min": 1325
},
{
"epoch": 1.1938610662358644,
"grad_norm": 0.2767026202339847,
"learning_rate": 3.77604083670751e-05,
"loss": 0.4472,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11101329326629639,
"step": 185,
"valid_targets_mean": 3925.6,
"valid_targets_min": 1324
},
{
"epoch": 1.2261712439418417,
"grad_norm": 0.29143791671926056,
"learning_rate": 3.754866812065008e-05,
"loss": 0.4471,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10287977755069733,
"step": 190,
"valid_targets_mean": 3699.9,
"valid_targets_min": 573
},
{
"epoch": 1.258481421647819,
"grad_norm": 0.26901543603219996,
"learning_rate": 3.7328015365312815e-05,
"loss": 0.4383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10629215836524963,
"step": 195,
"valid_targets_mean": 3979.9,
"valid_targets_min": 1310
},
{
"epoch": 1.2907915993537964,
"grad_norm": 0.26386364331283385,
"learning_rate": 3.709856216481852e-05,
"loss": 0.4461,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1253024935722351,
"step": 200,
"valid_targets_mean": 4954.8,
"valid_targets_min": 1384
},
{
"epoch": 1.3231017770597737,
"grad_norm": 0.28205279834558566,
"learning_rate": 3.6860425052437986e-05,
"loss": 0.4581,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1366412192583084,
"step": 205,
"valid_targets_mean": 4692.7,
"valid_targets_min": 1437
},
{
"epoch": 1.3554119547657513,
"grad_norm": 0.27397295572622304,
"learning_rate": 3.6613724971773426e-05,
"loss": 0.4475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10592503845691681,
"step": 210,
"valid_targets_mean": 3428.0,
"valid_targets_min": 1092
},
{
"epoch": 1.3877221324717286,
"grad_norm": 0.26346316493352695,
"learning_rate": 3.6358587215334355e-05,
"loss": 0.4466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11440782994031906,
"step": 215,
"valid_targets_mean": 4438.2,
"valid_targets_min": 1592
},
{
"epoch": 1.420032310177706,
"grad_norm": 0.28710754924699733,
"learning_rate": 3.609514136090483e-05,
"loss": 0.4448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10379820317029953,
"step": 220,
"valid_targets_mean": 3354.9,
"valid_targets_min": 714
},
{
"epoch": 1.4523424878836835,
"grad_norm": 0.2673010194641308,
"learning_rate": 3.582352120573427e-05,
"loss": 0.4367,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12144172936677933,
"step": 225,
"valid_targets_mean": 4791.8,
"valid_targets_min": 1801
},
{
"epoch": 1.4846526655896608,
"grad_norm": 0.29713460613466613,
"learning_rate": 3.554386469858534e-05,
"loss": 0.4462,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12215234339237213,
"step": 230,
"valid_targets_mean": 4136.6,
"valid_targets_min": 2541
},
{
"epoch": 1.5169628432956381,
"grad_norm": 0.27978320939134727,
"learning_rate": 3.5256313869673385e-05,
"loss": 0.4383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10995539277791977,
"step": 235,
"valid_targets_mean": 3900.1,
"valid_targets_min": 1037
},
{
"epoch": 1.5492730210016155,
"grad_norm": 0.2722574626172616,
"learning_rate": 3.4961014758533025e-05,
"loss": 0.441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12816287577152252,
"step": 240,
"valid_targets_mean": 5076.9,
"valid_targets_min": 2138
},
{
"epoch": 1.5815831987075928,
"grad_norm": 0.2673145334904031,
"learning_rate": 3.4658117339848476e-05,
"loss": 0.439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11988267302513123,
"step": 245,
"valid_targets_mean": 4658.4,
"valid_targets_min": 892
},
{
"epoch": 1.6138933764135701,
"grad_norm": 0.2918663090693661,
"learning_rate": 3.434777544728535e-05,
"loss": 0.4526,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12342672049999237,
"step": 250,
"valid_targets_mean": 4099.1,
"valid_targets_min": 1213
},
{
"epoch": 1.6462035541195477,
"grad_norm": 0.2736131631419664,
"learning_rate": 3.403014669536254e-05,
"loss": 0.4381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1107107549905777,
"step": 255,
"valid_targets_mean": 4110.0,
"valid_targets_min": 900
},
{
"epoch": 1.678513731825525,
"grad_norm": 0.2856001690229933,
"learning_rate": 3.370539239940398e-05,
"loss": 0.4339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10994768142700195,
"step": 260,
"valid_targets_mean": 4428.8,
"valid_targets_min": 885
},
{
"epoch": 1.7108239095315025,
"grad_norm": 0.27636624735054516,
"learning_rate": 3.337367749361079e-05,
"loss": 0.4359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09362054616212845,
"step": 265,
"valid_targets_mean": 3526.1,
"valid_targets_min": 405
},
{
"epoch": 1.7431340872374799,
"grad_norm": 0.2892324920954573,
"learning_rate": 3.3035170447295477e-05,
"loss": 0.4489,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11963844299316406,
"step": 270,
"valid_targets_mean": 3514.6,
"valid_targets_min": 480
},
{
"epoch": 1.7754442649434572,
"grad_norm": 0.29559967298100737,
"learning_rate": 3.269004317932088e-05,
"loss": 0.4376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10168647766113281,
"step": 275,
"valid_targets_mean": 3955.1,
"valid_targets_min": 843
},
{
"epoch": 1.8077544426494345,
"grad_norm": 0.29301743788955603,
"learning_rate": 3.2338470970786975e-05,
"loss": 0.4316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09594365209341049,
"step": 280,
"valid_targets_mean": 4001.1,
"valid_targets_min": 739
},
{
"epoch": 1.8400646203554119,
"grad_norm": 0.2714210889052756,
"learning_rate": 3.198063237601028e-05,
"loss": 0.4358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09736962616443634,
"step": 285,
"valid_targets_mean": 3362.9,
"valid_targets_min": 601
},
{
"epoch": 1.8723747980613892,
"grad_norm": 0.2938292684222412,
"learning_rate": 3.161670913184075e-05,
"loss": 0.4383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10187675803899765,
"step": 290,
"valid_targets_mean": 3258.2,
"valid_targets_min": 961
},
{
"epoch": 1.9046849757673667,
"grad_norm": 0.2893151712006615,
"learning_rate": 3.1246886065362384e-05,
"loss": 0.4356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12088151276111603,
"step": 295,
"valid_targets_mean": 3892.3,
"valid_targets_min": 1317
},
{
"epoch": 1.936995153473344,
"grad_norm": 0.29656932195947505,
"learning_rate": 3.0871351000024425e-05,
"loss": 0.4405,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11113157868385315,
"step": 300,
"valid_targets_mean": 4205.4,
"valid_targets_min": 633
},
{
"epoch": 1.9693053311793216,
"grad_norm": 0.291185824152099,
"learning_rate": 3.049029466025073e-05,
"loss": 0.4505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12442485243082047,
"step": 305,
"valid_targets_mean": 4337.8,
"valid_targets_min": 1336
},
{
"epoch": 2.0,
"grad_norm": 0.3230631171230839,
"learning_rate": 3.010391057457582e-05,
"loss": 0.4491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14164745807647705,
"step": 310,
"valid_targets_mean": 3661.9,
"valid_targets_min": 635
},
{
"epoch": 2.0323101777059773,
"grad_norm": 0.28933083551433375,
"learning_rate": 2.9712394977356824e-05,
"loss": 0.4268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1224806010723114,
"step": 315,
"valid_targets_mean": 4159.8,
"valid_targets_min": 1026
},
{
"epoch": 2.0646203554119547,
"grad_norm": 0.2667030484032612,
"learning_rate": 2.931594670911119e-05,
"loss": 0.4272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11036545783281326,
"step": 320,
"valid_targets_mean": 4104.9,
"valid_targets_min": 770
},
{
"epoch": 2.096930533117932,
"grad_norm": 0.2832908426862824,
"learning_rate": 2.891476711553077e-05,
"loss": 0.4245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10099711269140244,
"step": 325,
"valid_targets_mean": 3807.2,
"valid_targets_min": 1082
},
{
"epoch": 2.1292407108239093,
"grad_norm": 0.29187045263894107,
"learning_rate": 2.850905994522364e-05,
"loss": 0.4346,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0898057371377945,
"step": 330,
"valid_targets_mean": 3095.2,
"valid_targets_min": 448
},
{
"epoch": 2.161550888529887,
"grad_norm": 0.2770526132588516,
"learning_rate": 2.8099031246235518e-05,
"loss": 0.4316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09981365501880646,
"step": 335,
"valid_targets_mean": 3444.5,
"valid_targets_min": 927
},
{
"epoch": 2.1938610662358644,
"grad_norm": 0.2703022811543922,
"learning_rate": 2.768488926140336e-05,
"loss": 0.4302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10014323890209198,
"step": 340,
"valid_targets_mean": 4162.8,
"valid_targets_min": 794
},
{
"epoch": 2.2261712439418417,
"grad_norm": 0.2802946227690351,
"learning_rate": 2.7266844322594228e-05,
"loss": 0.419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10989852249622345,
"step": 345,
"valid_targets_mean": 4701.1,
"valid_targets_min": 682
},
{
"epoch": 2.258481421647819,
"grad_norm": 0.3051346896874913,
"learning_rate": 2.684510874388333e-05,
"loss": 0.4268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1363740861415863,
"step": 350,
"valid_targets_mean": 4684.3,
"valid_targets_min": 640
},
{
"epoch": 2.2907915993537964,
"grad_norm": 0.27648399125184,
"learning_rate": 2.6419896713725194e-05,
"loss": 0.4197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10181058943271637,
"step": 355,
"valid_targets_mean": 4190.1,
"valid_targets_min": 1038
},
{
"epoch": 2.3231017770597737,
"grad_norm": 0.30499154547298046,
"learning_rate": 2.599142418617299e-05,
"loss": 0.4294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10151579231023788,
"step": 360,
"valid_targets_mean": 3688.4,
"valid_targets_min": 726
},
{
"epoch": 2.355411954765751,
"grad_norm": 0.2957071744459196,
"learning_rate": 2.555990877120111e-05,
"loss": 0.4211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09378227591514587,
"step": 365,
"valid_targets_mean": 2939.3,
"valid_targets_min": 819
},
{
"epoch": 2.387722132471729,
"grad_norm": 0.2761336471089066,
"learning_rate": 2.5125569624186782e-05,
"loss": 0.4153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10543694347143173,
"step": 370,
"valid_targets_mean": 4215.9,
"valid_targets_min": 733
},
{
"epoch": 2.420032310177706,
"grad_norm": 0.2899428111740831,
"learning_rate": 2.4688627334606773e-05,
"loss": 0.4186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08878014236688614,
"step": 375,
"valid_targets_mean": 3717.1,
"valid_targets_min": 521
},
{
"epoch": 2.4523424878836835,
"grad_norm": 0.2979664309978484,
"learning_rate": 2.4249303814005787e-05,
"loss": 0.4315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11504903435707092,
"step": 380,
"valid_targets_mean": 3742.7,
"valid_targets_min": 722
},
{
"epoch": 2.484652665589661,
"grad_norm": 0.2994522528219869,
"learning_rate": 2.380782218329337e-05,
"loss": 0.4343,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10162772238254547,
"step": 385,
"valid_targets_mean": 3453.4,
"valid_targets_min": 576
},
{
"epoch": 2.516962843295638,
"grad_norm": 0.27098939969509317,
"learning_rate": 2.33644066594267e-05,
"loss": 0.4257,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10443132370710373,
"step": 390,
"valid_targets_mean": 3782.1,
"valid_targets_min": 953
},
{
"epoch": 2.5492730210016155,
"grad_norm": 0.27358095003105976,
"learning_rate": 2.2919282441536622e-05,
"loss": 0.4252,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12023317813873291,
"step": 395,
"valid_targets_mean": 3923.7,
"valid_targets_min": 666
},
{
"epoch": 2.581583198707593,
"grad_norm": 0.27561709551863967,
"learning_rate": 2.247267559655492e-05,
"loss": 0.425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10897140204906464,
"step": 400,
"valid_targets_mean": 4004.1,
"valid_targets_min": 831
},
{
"epoch": 2.61389337641357,
"grad_norm": 0.2911055404776475,
"learning_rate": 2.202481294440086e-05,
"loss": 0.4272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1356189250946045,
"step": 405,
"valid_targets_mean": 4587.9,
"valid_targets_min": 1835
},
{
"epoch": 2.6462035541195474,
"grad_norm": 0.27638152367327684,
"learning_rate": 2.1575921942785247e-05,
"loss": 0.4135,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11528658121824265,
"step": 410,
"valid_targets_mean": 4123.7,
"valid_targets_min": 1322
},
{
"epoch": 2.678513731825525,
"grad_norm": 0.2724723503201682,
"learning_rate": 2.1126230571690688e-05,
"loss": 0.4247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10935419052839279,
"step": 415,
"valid_targets_mean": 3782.7,
"valid_targets_min": 696
},
{
"epoch": 2.7108239095315025,
"grad_norm": 0.28022431584327206,
"learning_rate": 2.0675967217586453e-05,
"loss": 0.425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11043274402618408,
"step": 420,
"valid_targets_mean": 4288.0,
"valid_targets_min": 1929
},
{
"epoch": 2.74313408723748,
"grad_norm": 0.26570243268701954,
"learning_rate": 2.022536055743702e-05,
"loss": 0.4336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09703566879034042,
"step": 425,
"valid_targets_mean": 3890.9,
"valid_targets_min": 1285
},
{
"epoch": 2.775444264943457,
"grad_norm": 0.2803669278086659,
"learning_rate": 1.9774639442562994e-05,
"loss": 0.4206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11521075665950775,
"step": 430,
"valid_targets_mean": 4614.6,
"valid_targets_min": 611
},
{
"epoch": 2.8077544426494345,
"grad_norm": 0.27372724895007716,
"learning_rate": 1.932403278241355e-05,
"loss": 0.4171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10905209183692932,
"step": 435,
"valid_targets_mean": 4459.7,
"valid_targets_min": 1502
},
{
"epoch": 2.840064620355412,
"grad_norm": 0.2560272749894161,
"learning_rate": 1.8873769428309315e-05,
"loss": 0.4225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10236693173646927,
"step": 440,
"valid_targets_mean": 4706.2,
"valid_targets_min": 607
},
{
"epoch": 2.872374798061389,
"grad_norm": 0.2697096585907479,
"learning_rate": 1.8424078057214753e-05,
"loss": 0.4207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10952171683311462,
"step": 445,
"valid_targets_mean": 4067.4,
"valid_targets_min": 550
},
{
"epoch": 2.904684975767367,
"grad_norm": 0.29101709596296904,
"learning_rate": 1.7975187055599153e-05,
"loss": 0.4259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12544356286525726,
"step": 450,
"valid_targets_mean": 3887.2,
"valid_targets_min": 730
},
{
"epoch": 2.936995153473344,
"grad_norm": 0.28158413880193023,
"learning_rate": 1.7527324403445086e-05,
"loss": 0.4164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10754776746034622,
"step": 455,
"valid_targets_mean": 3837.2,
"valid_targets_min": 1232
},
{
"epoch": 2.9693053311793216,
"grad_norm": 0.2846122123476895,
"learning_rate": 1.708071755846338e-05,
"loss": 0.4265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10734738409519196,
"step": 460,
"valid_targets_mean": 3481.8,
"valid_targets_min": 1306
},
{
"epoch": 3.0,
"grad_norm": 0.30539912011689785,
"learning_rate": 1.66355933405733e-05,
"loss": 0.4203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1297743320465088,
"step": 465,
"valid_targets_mean": 3604.1,
"valid_targets_min": 608
},
{
"epoch": 3.0323101777059773,
"grad_norm": 0.2906283787124391,
"learning_rate": 1.619217781670663e-05,
"loss": 0.4222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10506054759025574,
"step": 470,
"valid_targets_mean": 3606.9,
"valid_targets_min": 1303
},
{
"epoch": 3.0646203554119547,
"grad_norm": 0.28401688637835226,
"learning_rate": 1.5750696185994226e-05,
"loss": 0.4238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.092110276222229,
"step": 475,
"valid_targets_mean": 3426.1,
"valid_targets_min": 603
},
{
"epoch": 3.096930533117932,
"grad_norm": 0.261548774285929,
"learning_rate": 1.531137266539323e-05,
"loss": 0.4019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10355602204799652,
"step": 480,
"valid_targets_mean": 4255.1,
"valid_targets_min": 873
},
{
"epoch": 3.1292407108239093,
"grad_norm": 0.3198608674027354,
"learning_rate": 1.4874430375813223e-05,
"loss": 0.4153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10992392897605896,
"step": 485,
"valid_targets_mean": 3584.8,
"valid_targets_min": 534
},
{
"epoch": 3.161550888529887,
"grad_norm": 0.28922630514389835,
"learning_rate": 1.4440091228798896e-05,
"loss": 0.4203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12728840112686157,
"step": 490,
"valid_targets_mean": 4236.7,
"valid_targets_min": 788
},
{
"epoch": 3.1938610662358644,
"grad_norm": 0.2779446885189012,
"learning_rate": 1.4008575813827023e-05,
"loss": 0.4154,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09563896059989929,
"step": 495,
"valid_targets_mean": 3544.6,
"valid_targets_min": 601
},
{
"epoch": 3.2261712439418417,
"grad_norm": 0.2700572606821521,
"learning_rate": 1.3580103286274816e-05,
"loss": 0.4162,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11817362904548645,
"step": 500,
"valid_targets_mean": 4341.7,
"valid_targets_min": 1233
},
{
"epoch": 3.258481421647819,
"grad_norm": 0.28169645716296227,
"learning_rate": 1.3154891256116677e-05,
"loss": 0.4118,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1011514663696289,
"step": 505,
"valid_targets_mean": 3950.1,
"valid_targets_min": 1079
},
{
"epoch": 3.2907915993537964,
"grad_norm": 0.2521242878848628,
"learning_rate": 1.2733155677405776e-05,
"loss": 0.4205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10162997990846634,
"step": 510,
"valid_targets_mean": 4916.3,
"valid_targets_min": 1809
},
{
"epoch": 3.3231017770597737,
"grad_norm": 0.26803173620107257,
"learning_rate": 1.2315110738596654e-05,
"loss": 0.4121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09578994661569595,
"step": 515,
"valid_targets_mean": 3822.8,
"valid_targets_min": 1606
},
{
"epoch": 3.355411954765751,
"grad_norm": 0.27660293170452704,
"learning_rate": 1.1900968753764483e-05,
"loss": 0.4066,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1010737344622612,
"step": 520,
"valid_targets_mean": 3893.4,
"valid_targets_min": 909
},
{
"epoch": 3.387722132471729,
"grad_norm": 0.26313591447221535,
"learning_rate": 1.1490940054776365e-05,
"loss": 0.4181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10495524108409882,
"step": 525,
"valid_targets_mean": 4405.3,
"valid_targets_min": 808
},
{
"epoch": 3.420032310177706,
"grad_norm": 0.27385516656009323,
"learning_rate": 1.1085232884469236e-05,
"loss": 0.4103,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09986208379268646,
"step": 530,
"valid_targets_mean": 3620.4,
"valid_targets_min": 748
},
{
"epoch": 3.4523424878836835,
"grad_norm": 0.27628551895346487,
"learning_rate": 1.0684053290888824e-05,
"loss": 0.4018,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11552257835865021,
"step": 535,
"valid_targets_mean": 4266.4,
"valid_targets_min": 640
},
{
"epoch": 3.484652665589661,
"grad_norm": 0.2718878735955647,
"learning_rate": 1.0287605022643183e-05,
"loss": 0.4201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09710375964641571,
"step": 540,
"valid_targets_mean": 3415.8,
"valid_targets_min": 371
},
{
"epoch": 3.516962843295638,
"grad_norm": 0.2865387366012006,
"learning_rate": 9.896089425424188e-06,
"loss": 0.4285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07953400164842606,
"step": 545,
"valid_targets_mean": 2944.9,
"valid_targets_min": 712
},
{
"epoch": 3.5492730210016155,
"grad_norm": 0.25966498720410713,
"learning_rate": 9.509705339749277e-06,
"loss": 0.4076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09222520887851715,
"step": 550,
"valid_targets_mean": 3501.2,
"valid_targets_min": 667
},
{
"epoch": 3.581583198707593,
"grad_norm": 0.2713083547984801,
"learning_rate": 9.12864899997558e-06,
"loss": 0.4175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1106656938791275,
"step": 555,
"valid_targets_mean": 3612.1,
"valid_targets_min": 1550
},
{
"epoch": 3.61389337641357,
"grad_norm": 0.2737405302240275,
"learning_rate": 8.753113934637621e-06,
"loss": 0.4122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09648142755031586,
"step": 560,
"valid_targets_mean": 3494.7,
"valid_targets_min": 1065
},
{
"epoch": 3.6462035541195474,
"grad_norm": 0.28551738045137637,
"learning_rate": 8.383290868159256e-06,
"loss": 0.4106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09706847369670868,
"step": 565,
"valid_targets_mean": 3369.0,
"valid_targets_min": 570
},
{
"epoch": 3.678513731825525,
"grad_norm": 0.2704546745998757,
"learning_rate": 8.01936762398972e-06,
"loss": 0.4089,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09082476794719696,
"step": 570,
"valid_targets_mean": 3314.4,
"valid_targets_min": 1093
},
{
"epoch": 3.7108239095315025,
"grad_norm": 0.2883589575944319,
"learning_rate": 7.661529029213023e-06,
"loss": 0.404,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11036745458841324,
"step": 575,
"valid_targets_mean": 4221.6,
"valid_targets_min": 892
},
{
"epoch": 3.74313408723748,
"grad_norm": 0.2719955608173771,
"learning_rate": 7.3099568206791315e-06,
"loss": 0.41,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10909552872180939,
"step": 580,
"valid_targets_mean": 3840.0,
"valid_targets_min": 463
},
{
"epoch": 3.775444264943457,
"grad_norm": 0.27643080510215096,
"learning_rate": 6.964829552704526e-06,
"loss": 0.4139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10257598757743835,
"step": 585,
"valid_targets_mean": 3169.1,
"valid_targets_min": 682
},
{
"epoch": 3.8077544426494345,
"grad_norm": 0.2689937521699833,
"learning_rate": 6.62632250638922e-06,
"loss": 0.4156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09786760061979294,
"step": 590,
"valid_targets_mean": 3638.8,
"valid_targets_min": 1010
},
{
"epoch": 3.840064620355412,
"grad_norm": 0.34681376444735507,
"learning_rate": 6.2946076005960184e-06,
"loss": 0.4102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10728378593921661,
"step": 595,
"valid_targets_mean": 4384.9,
"valid_targets_min": 521
},
{
"epoch": 3.872374798061389,
"grad_norm": 0.2749441764442808,
"learning_rate": 5.969853304637467e-06,
"loss": 0.4144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09306375682353973,
"step": 600,
"valid_targets_mean": 3161.8,
"valid_targets_min": 914
},
{
"epoch": 3.904684975767367,
"grad_norm": 0.27306877129599383,
"learning_rate": 5.65222455271466e-06,
"loss": 0.4205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08627478778362274,
"step": 605,
"valid_targets_mean": 3452.9,
"valid_targets_min": 326
},
{
"epoch": 3.936995153473344,
"grad_norm": 0.2786598032003557,
"learning_rate": 5.341882660151527e-06,
"loss": 0.4107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1056986153125763,
"step": 610,
"valid_targets_mean": 3757.9,
"valid_targets_min": 600
},
{
"epoch": 3.9693053311793216,
"grad_norm": 0.24556167869153683,
"learning_rate": 5.038985241466978e-06,
"loss": 0.4056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09949535876512527,
"step": 615,
"valid_targets_mean": 3879.6,
"valid_targets_min": 442
},
{
"epoch": 4.0,
"grad_norm": 0.4202349721331711,
"learning_rate": 4.7436861303266255e-06,
"loss": 0.4157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12905532121658325,
"step": 620,
"valid_targets_mean": 3440.1,
"valid_targets_min": 697
},
{
"epoch": 4.032310177705978,
"grad_norm": 0.26293159983995606,
"learning_rate": 4.456135301414672e-06,
"loss": 0.4067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11424065381288528,
"step": 625,
"valid_targets_mean": 3840.8,
"valid_targets_min": 681
},
{
"epoch": 4.064620355411955,
"grad_norm": 0.43342150888282877,
"learning_rate": 4.176478794265737e-06,
"loss": 0.4057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10436274111270905,
"step": 630,
"valid_targets_mean": 3659.8,
"valid_targets_min": 1872
},
{
"epoch": 4.096930533117932,
"grad_norm": 0.3801825882709417,
"learning_rate": 3.904858639095174e-06,
"loss": 0.4204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11651758849620819,
"step": 635,
"valid_targets_mean": 4127.8,
"valid_targets_min": 1477
},
{
"epoch": 4.129240710823909,
"grad_norm": 0.25828199655425915,
"learning_rate": 3.641412784665648e-06,
"loss": 0.4128,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10827606171369553,
"step": 640,
"valid_targets_mean": 4205.9,
"valid_targets_min": 903
},
{
"epoch": 4.161550888529887,
"grad_norm": 0.26223751778594456,
"learning_rate": 3.3862750282265798e-06,
"loss": 0.4133,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1035182774066925,
"step": 645,
"valid_targets_mean": 3863.5,
"valid_targets_min": 1102
},
{
"epoch": 4.193861066235864,
"grad_norm": 0.26844737353091586,
"learning_rate": 3.1395749475620185e-06,
"loss": 0.4163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11465262621641159,
"step": 650,
"valid_targets_mean": 4107.2,
"valid_targets_min": 2044
},
{
"epoch": 4.226171243941842,
"grad_norm": 0.2778999716523805,
"learning_rate": 2.9014378351814866e-06,
"loss": 0.4076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09468638151884079,
"step": 655,
"valid_targets_mean": 3168.5,
"valid_targets_min": 1039
},
{
"epoch": 4.258481421647819,
"grad_norm": 0.26830655466160835,
"learning_rate": 2.671984634687186e-06,
"loss": 0.3974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0945512056350708,
"step": 660,
"valid_targets_mean": 3421.4,
"valid_targets_min": 840
},
{
"epoch": 4.290791599353796,
"grad_norm": 0.2873158323777035,
"learning_rate": 2.4513318793499274e-06,
"loss": 0.4098,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09555218368768692,
"step": 665,
"valid_targets_mean": 3014.9,
"valid_targets_min": 472
},
{
"epoch": 4.323101777059774,
"grad_norm": 0.24585209774411093,
"learning_rate": 2.239591632924907e-06,
"loss": 0.4041,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09621794521808624,
"step": 670,
"valid_targets_mean": 3395.1,
"valid_targets_min": 468
},
{
"epoch": 4.355411954765751,
"grad_norm": 0.26594780949791114,
"learning_rate": 2.0368714327374905e-06,
"loss": 0.4119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09523327648639679,
"step": 675,
"valid_targets_mean": 3977.6,
"valid_targets_min": 976
},
{
"epoch": 4.387722132471729,
"grad_norm": 0.2616672301186846,
"learning_rate": 1.8432742350678023e-06,
"loss": 0.4168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10347115993499756,
"step": 680,
"valid_targets_mean": 3634.4,
"valid_targets_min": 437
},
{
"epoch": 4.420032310177706,
"grad_norm": 0.25286821109840774,
"learning_rate": 1.6588983628619404e-06,
"loss": 0.4082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09281186759471893,
"step": 685,
"valid_targets_mean": 4083.2,
"valid_targets_min": 1570
},
{
"epoch": 4.4523424878836835,
"grad_norm": 0.27245429614361333,
"learning_rate": 1.4838374557963064e-06,
"loss": 0.4077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10341990739107132,
"step": 690,
"valid_targets_mean": 3520.6,
"valid_targets_min": 1209
},
{
"epoch": 4.48465266558966,
"grad_norm": 0.2571028998172337,
"learning_rate": 1.3181804227204897e-06,
"loss": 0.4151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10382469743490219,
"step": 695,
"valid_targets_mean": 4107.4,
"valid_targets_min": 990
},
{
"epoch": 4.516962843295638,
"grad_norm": 0.25853771008397497,
"learning_rate": 1.1620113965027823e-06,
"loss": 0.4088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11473739147186279,
"step": 700,
"valid_targets_mean": 4600.4,
"valid_targets_min": 1167
},
{
"epoch": 4.549273021001616,
"grad_norm": 0.25555348361237107,
"learning_rate": 1.0154096913012989e-06,
"loss": 0.4119,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09417924284934998,
"step": 705,
"valid_targets_mean": 3880.7,
"valid_targets_min": 714
},
{
"epoch": 4.581583198707593,
"grad_norm": 0.25034785262389336,
"learning_rate": 8.784497622823874e-07,
"loss": 0.4082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09192933887243271,
"step": 710,
"valid_targets_mean": 3490.3,
"valid_targets_min": 1247
},
{
"epoch": 4.613893376413571,
"grad_norm": 0.2639146483604065,
"learning_rate": 7.512011678068077e-07,
"loss": 0.4027,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11304928362369537,
"step": 715,
"valid_targets_mean": 4564.2,
"valid_targets_min": 2029
},
{
"epoch": 4.646203554119547,
"grad_norm": 0.25644367484928005,
"learning_rate": 6.33728534102862e-07,
"loss": 0.4029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09347259998321533,
"step": 720,
"valid_targets_mean": 3595.4,
"valid_targets_min": 937
},
{
"epoch": 4.678513731825525,
"grad_norm": 0.26466164823269805,
"learning_rate": 5.260915224444207e-07,
"loss": 0.406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0985584706068039,
"step": 725,
"valid_targets_mean": 3558.4,
"valid_targets_min": 1108
},
{
"epoch": 4.710823909531502,
"grad_norm": 0.26762760315162576,
"learning_rate": 4.2834479885052846e-07,
"loss": 0.4151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09770895540714264,
"step": 730,
"valid_targets_mean": 3214.7,
"valid_targets_min": 691
},
{
"epoch": 4.74313408723748,
"grad_norm": 0.27795743352938646,
"learning_rate": 3.4053800632196434e-07,
"loss": 0.4173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12234581261873245,
"step": 735,
"valid_targets_mean": 4275.8,
"valid_targets_min": 856
},
{
"epoch": 4.775444264943458,
"grad_norm": 0.25529297793215555,
"learning_rate": 2.627157396288604e-07,
"loss": 0.3984,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07973651587963104,
"step": 740,
"valid_targets_mean": 3078.0,
"valid_targets_min": 766
},
{
"epoch": 4.8077544426494345,
"grad_norm": 0.2742977151583616,
"learning_rate": 1.94917522662188e-07,
"loss": 0.399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10854361206293106,
"step": 745,
"valid_targets_mean": 4476.7,
"valid_targets_min": 982
},
{
"epoch": 4.840064620355412,
"grad_norm": 0.25263871685885614,
"learning_rate": 1.3717778836060735e-07,
"loss": 0.4039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09801102429628372,
"step": 750,
"valid_targets_mean": 4007.4,
"valid_targets_min": 1477
},
{
"epoch": 4.872374798061389,
"grad_norm": 0.26280785954762587,
"learning_rate": 8.952586122287443e-08,
"loss": 0.4127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11048153042793274,
"step": 755,
"valid_targets_mean": 3341.8,
"valid_targets_min": 811
},
{
"epoch": 4.904684975767367,
"grad_norm": 0.27488205591473774,
"learning_rate": 5.1985942414709556e-08,
"loss": 0.4107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09189851582050323,
"step": 760,
"valid_targets_mean": 3471.2,
"valid_targets_min": 889
},
{
"epoch": 4.936995153473344,
"grad_norm": 0.25480665657723,
"learning_rate": 2.4577097477647137e-08,
"loss": 0.4012,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09942759573459625,
"step": 765,
"valid_targets_mean": 3963.9,
"valid_targets_min": 1524
},
{
"epoch": 4.969305331179322,
"grad_norm": 0.39531924920960926,
"learning_rate": 7.313246646150074e-09,
"loss": 0.409,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0939750149846077,
"step": 770,
"valid_targets_mean": 4071.9,
"valid_targets_min": 725
},
{
"epoch": 5.0,
"grad_norm": 0.31807527946415975,
"learning_rate": 2.0315777789159387e-10,
"loss": 0.4055,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1633167266845703,
"step": 775,
"valid_targets_mean": 3996.9,
"valid_targets_min": 611
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1633167266845703,
"step": 775,
"total_flos": 1.2782147307999068e+18,
"train_loss": 0.4453052529981059,
"train_runtime": 15886.2519,
"train_samples_per_second": 3.115,
"train_steps_per_second": 0.049,
"valid_targets_mean": 3996.9,
"valid_targets_min": 611
}
],
"logging_steps": 5,
"max_steps": 775,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2782147307999068e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}