glm46-Toolscale-tasks-traces / trainer_state.json
penfever's picture
End of training
006e4c6 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 1778,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01968503937007874,
"grad_norm": 13.377326898586817,
"learning_rate": 8.98876404494382e-07,
"loss": 0.7475,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.38955485820770264,
"step": 5,
"valid_targets_mean": 4678.4,
"valid_targets_min": 3013
},
{
"epoch": 0.03937007874015748,
"grad_norm": 12.867038390711407,
"learning_rate": 2.02247191011236e-06,
"loss": 0.7279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3539108633995056,
"step": 10,
"valid_targets_mean": 4779.2,
"valid_targets_min": 1942
},
{
"epoch": 0.05905511811023622,
"grad_norm": 7.867476001273223,
"learning_rate": 3.146067415730337e-06,
"loss": 0.6706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3204025626182556,
"step": 15,
"valid_targets_mean": 5289.4,
"valid_targets_min": 2346
},
{
"epoch": 0.07874015748031496,
"grad_norm": 4.730850099146031,
"learning_rate": 4.269662921348315e-06,
"loss": 0.6165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2844008803367615,
"step": 20,
"valid_targets_mean": 5087.5,
"valid_targets_min": 2422
},
{
"epoch": 0.0984251968503937,
"grad_norm": 3.44075242801273,
"learning_rate": 5.393258426966292e-06,
"loss": 0.5435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2601749897003174,
"step": 25,
"valid_targets_mean": 4788.9,
"valid_targets_min": 1793
},
{
"epoch": 0.11811023622047244,
"grad_norm": 1.8265052589196686,
"learning_rate": 6.51685393258427e-06,
"loss": 0.5004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.253802090883255,
"step": 30,
"valid_targets_mean": 3993.4,
"valid_targets_min": 2457
},
{
"epoch": 0.1377952755905512,
"grad_norm": 1.1615066513491945,
"learning_rate": 7.640449438202247e-06,
"loss": 0.4736,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2736068367958069,
"step": 35,
"valid_targets_mean": 5064.5,
"valid_targets_min": 3999
},
{
"epoch": 0.15748031496062992,
"grad_norm": 0.876265338117197,
"learning_rate": 8.764044943820226e-06,
"loss": 0.4324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20510250329971313,
"step": 40,
"valid_targets_mean": 4028.5,
"valid_targets_min": 1906
},
{
"epoch": 0.17716535433070865,
"grad_norm": 0.7489621590178446,
"learning_rate": 9.887640449438202e-06,
"loss": 0.4114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2077813297510147,
"step": 45,
"valid_targets_mean": 5186.0,
"valid_targets_min": 2932
},
{
"epoch": 0.1968503937007874,
"grad_norm": 0.6500573206697052,
"learning_rate": 1.101123595505618e-05,
"loss": 0.4005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20258010923862457,
"step": 50,
"valid_targets_mean": 5590.5,
"valid_targets_min": 3169
},
{
"epoch": 0.21653543307086615,
"grad_norm": 0.6512845659247514,
"learning_rate": 1.213483146067416e-05,
"loss": 0.365,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18620145320892334,
"step": 55,
"valid_targets_mean": 4875.2,
"valid_targets_min": 2637
},
{
"epoch": 0.23622047244094488,
"grad_norm": 0.5972173412666366,
"learning_rate": 1.3258426966292135e-05,
"loss": 0.3529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17318624258041382,
"step": 60,
"valid_targets_mean": 4512.5,
"valid_targets_min": 1417
},
{
"epoch": 0.2559055118110236,
"grad_norm": 0.6476700761815668,
"learning_rate": 1.4382022471910113e-05,
"loss": 0.3551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17597442865371704,
"step": 65,
"valid_targets_mean": 4269.4,
"valid_targets_min": 2649
},
{
"epoch": 0.2755905511811024,
"grad_norm": 0.6395534766246243,
"learning_rate": 1.5505617977528093e-05,
"loss": 0.3442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18326064944267273,
"step": 70,
"valid_targets_mean": 4353.8,
"valid_targets_min": 2902
},
{
"epoch": 0.2952755905511811,
"grad_norm": 0.6717242035489265,
"learning_rate": 1.662921348314607e-05,
"loss": 0.3233,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1446356326341629,
"step": 75,
"valid_targets_mean": 5262.8,
"valid_targets_min": 3696
},
{
"epoch": 0.31496062992125984,
"grad_norm": 0.5715249456701663,
"learning_rate": 1.7752808988764045e-05,
"loss": 0.3186,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16611459851264954,
"step": 80,
"valid_targets_mean": 4790.8,
"valid_targets_min": 3340
},
{
"epoch": 0.3346456692913386,
"grad_norm": 0.5657793529460273,
"learning_rate": 1.8876404494382024e-05,
"loss": 0.3115,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1834031641483307,
"step": 85,
"valid_targets_mean": 5281.4,
"valid_targets_min": 2647
},
{
"epoch": 0.3543307086614173,
"grad_norm": 0.5396050121171856,
"learning_rate": 2e-05,
"loss": 0.3042,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1623716652393341,
"step": 90,
"valid_targets_mean": 5269.4,
"valid_targets_min": 3708
},
{
"epoch": 0.37401574803149606,
"grad_norm": 0.5878961150139292,
"learning_rate": 2.1123595505617976e-05,
"loss": 0.3107,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17046542465686798,
"step": 95,
"valid_targets_mean": 4884.6,
"valid_targets_min": 3463
},
{
"epoch": 0.3937007874015748,
"grad_norm": 0.5598300866750495,
"learning_rate": 2.2247191011235958e-05,
"loss": 0.3049,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14702649414539337,
"step": 100,
"valid_targets_mean": 4995.5,
"valid_targets_min": 2833
},
{
"epoch": 0.41338582677165353,
"grad_norm": 0.6134785924575019,
"learning_rate": 2.3370786516853933e-05,
"loss": 0.2907,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13509149849414825,
"step": 105,
"valid_targets_mean": 3742.6,
"valid_targets_min": 1345
},
{
"epoch": 0.4330708661417323,
"grad_norm": 0.5515976965757202,
"learning_rate": 2.4494382022471914e-05,
"loss": 0.2891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12262013554573059,
"step": 110,
"valid_targets_mean": 4388.1,
"valid_targets_min": 2261
},
{
"epoch": 0.452755905511811,
"grad_norm": 0.5292890660276033,
"learning_rate": 2.561797752808989e-05,
"loss": 0.2806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13413403928279877,
"step": 115,
"valid_targets_mean": 4728.8,
"valid_targets_min": 1767
},
{
"epoch": 0.47244094488188976,
"grad_norm": 0.5681550467939387,
"learning_rate": 2.6741573033707867e-05,
"loss": 0.2962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12195228040218353,
"step": 120,
"valid_targets_mean": 3707.5,
"valid_targets_min": 2739
},
{
"epoch": 0.4921259842519685,
"grad_norm": 0.5401370762488225,
"learning_rate": 2.7865168539325845e-05,
"loss": 0.2722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13443610072135925,
"step": 125,
"valid_targets_mean": 4273.0,
"valid_targets_min": 1690
},
{
"epoch": 0.5118110236220472,
"grad_norm": 0.6079952597585139,
"learning_rate": 2.8988764044943823e-05,
"loss": 0.2701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13780900835990906,
"step": 130,
"valid_targets_mean": 4706.5,
"valid_targets_min": 2368
},
{
"epoch": 0.531496062992126,
"grad_norm": 0.5805201834506256,
"learning_rate": 3.0112359550561798e-05,
"loss": 0.2808,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14059343934059143,
"step": 135,
"valid_targets_mean": 4191.0,
"valid_targets_min": 2822
},
{
"epoch": 0.5511811023622047,
"grad_norm": 0.5287610402839743,
"learning_rate": 3.123595505617978e-05,
"loss": 0.2634,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13570359349250793,
"step": 140,
"valid_targets_mean": 5246.1,
"valid_targets_min": 2401
},
{
"epoch": 0.5708661417322834,
"grad_norm": 0.5863915877892256,
"learning_rate": 3.235955056179776e-05,
"loss": 0.2745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14580059051513672,
"step": 145,
"valid_targets_mean": 5247.1,
"valid_targets_min": 3654
},
{
"epoch": 0.5905511811023622,
"grad_norm": 0.5157922011298175,
"learning_rate": 3.3483146067415736e-05,
"loss": 0.2669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12821562588214874,
"step": 150,
"valid_targets_mean": 5522.6,
"valid_targets_min": 4245
},
{
"epoch": 0.610236220472441,
"grad_norm": 0.569286988628328,
"learning_rate": 3.4606741573033714e-05,
"loss": 0.2702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12619233131408691,
"step": 155,
"valid_targets_mean": 4623.1,
"valid_targets_min": 2863
},
{
"epoch": 0.6299212598425197,
"grad_norm": 0.559971526520718,
"learning_rate": 3.5730337078651685e-05,
"loss": 0.262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1247473731637001,
"step": 160,
"valid_targets_mean": 4603.0,
"valid_targets_min": 3131
},
{
"epoch": 0.6496062992125984,
"grad_norm": 0.9167539387348389,
"learning_rate": 3.685393258426967e-05,
"loss": 0.2688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13964977860450745,
"step": 165,
"valid_targets_mean": 4778.0,
"valid_targets_min": 2913
},
{
"epoch": 0.6692913385826772,
"grad_norm": 0.5722654745509507,
"learning_rate": 3.797752808988764e-05,
"loss": 0.2609,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15202975273132324,
"step": 170,
"valid_targets_mean": 4783.8,
"valid_targets_min": 3134
},
{
"epoch": 0.6889763779527559,
"grad_norm": 0.8027389387891366,
"learning_rate": 3.910112359550562e-05,
"loss": 0.2599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1353817582130432,
"step": 175,
"valid_targets_mean": 4434.0,
"valid_targets_min": 2312
},
{
"epoch": 0.7086614173228346,
"grad_norm": 0.6634626818025015,
"learning_rate": 3.999996144687019e-05,
"loss": 0.2671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12160596996545792,
"step": 180,
"valid_targets_mean": 4279.1,
"valid_targets_min": 2944
},
{
"epoch": 0.7283464566929134,
"grad_norm": 0.6011679430592792,
"learning_rate": 3.9998612102933544e-05,
"loss": 0.2643,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12006179243326187,
"step": 185,
"valid_targets_mean": 4417.9,
"valid_targets_min": 1909
},
{
"epoch": 0.7480314960629921,
"grad_norm": 0.547919195160097,
"learning_rate": 3.999533525113843e-05,
"loss": 0.2687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14171993732452393,
"step": 190,
"valid_targets_mean": 4230.6,
"valid_targets_min": 2168
},
{
"epoch": 0.7677165354330708,
"grad_norm": 0.560316989395598,
"learning_rate": 3.9990131207314634e-05,
"loss": 0.2573,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14378751814365387,
"step": 195,
"valid_targets_mean": 5504.2,
"valid_targets_min": 1806
},
{
"epoch": 0.7874015748031497,
"grad_norm": 0.5690112662530462,
"learning_rate": 3.998300047303874e-05,
"loss": 0.2554,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14783123135566711,
"step": 200,
"valid_targets_mean": 5525.0,
"valid_targets_min": 4445
},
{
"epoch": 0.8070866141732284,
"grad_norm": 2.3450199445980067,
"learning_rate": 3.997394373558576e-05,
"loss": 0.2572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1219000518321991,
"step": 205,
"valid_targets_mean": 3956.6,
"valid_targets_min": 1882
},
{
"epoch": 0.8267716535433071,
"grad_norm": 0.4978819107570836,
"learning_rate": 3.9962961867862894e-05,
"loss": 0.2506,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13276076316833496,
"step": 210,
"valid_targets_mean": 5146.9,
"valid_targets_min": 3580
},
{
"epoch": 0.8464566929133859,
"grad_norm": 0.5531498839376738,
"learning_rate": 3.995005592832541e-05,
"loss": 0.2435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11952689290046692,
"step": 215,
"valid_targets_mean": 4152.2,
"valid_targets_min": 2934
},
{
"epoch": 0.8661417322834646,
"grad_norm": 0.5763234070507463,
"learning_rate": 3.993522716087462e-05,
"loss": 0.2638,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13357281684875488,
"step": 220,
"valid_targets_mean": 4199.6,
"valid_targets_min": 3054
},
{
"epoch": 0.8858267716535433,
"grad_norm": 0.5409044947248569,
"learning_rate": 3.991847699473801e-05,
"loss": 0.2617,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16128699481487274,
"step": 225,
"valid_targets_mean": 5807.9,
"valid_targets_min": 3396
},
{
"epoch": 0.905511811023622,
"grad_norm": 0.5461745315822698,
"learning_rate": 3.989980704433144e-05,
"loss": 0.2487,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12190870940685272,
"step": 230,
"valid_targets_mean": 4621.0,
"valid_targets_min": 848
},
{
"epoch": 0.9251968503937008,
"grad_norm": 0.5690448362044543,
"learning_rate": 3.98792191091036e-05,
"loss": 0.2435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13951994478702545,
"step": 235,
"valid_targets_mean": 5036.2,
"valid_targets_min": 1140
},
{
"epoch": 0.9448818897637795,
"grad_norm": 0.6352267743435406,
"learning_rate": 3.9856715173362527e-05,
"loss": 0.2454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12012356519699097,
"step": 240,
"valid_targets_mean": 5087.2,
"valid_targets_min": 4575
},
{
"epoch": 0.9645669291338582,
"grad_norm": 0.5084076154508048,
"learning_rate": 3.9832297406084386e-05,
"loss": 0.2448,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.111626535654068,
"step": 245,
"valid_targets_mean": 5866.0,
"valid_targets_min": 3710
},
{
"epoch": 0.984251968503937,
"grad_norm": 0.5585535641154713,
"learning_rate": 3.980596816070442e-05,
"loss": 0.246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11693195253610611,
"step": 250,
"valid_targets_mean": 4081.4,
"valid_targets_min": 2244
},
{
"epoch": 1.0039370078740157,
"grad_norm": 0.5000544856776992,
"learning_rate": 3.97777299748901e-05,
"loss": 0.251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13787493109703064,
"step": 255,
"valid_targets_mean": 5705.5,
"valid_targets_min": 2544
},
{
"epoch": 1.0236220472440944,
"grad_norm": 0.5681012398914147,
"learning_rate": 3.974758557029653e-05,
"loss": 0.235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12040223926305771,
"step": 260,
"valid_targets_mean": 4746.1,
"valid_targets_min": 2055
},
{
"epoch": 1.0433070866141732,
"grad_norm": 0.6404779799328454,
"learning_rate": 3.971553785230418e-05,
"loss": 0.238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13121241331100464,
"step": 265,
"valid_targets_mean": 4746.4,
"valid_targets_min": 2529
},
{
"epoch": 1.0629921259842519,
"grad_norm": 0.5539946147548086,
"learning_rate": 3.968158990973881e-05,
"loss": 0.23,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11273740977048874,
"step": 270,
"valid_targets_mean": 4944.6,
"valid_targets_min": 3149
},
{
"epoch": 1.0826771653543308,
"grad_norm": 0.5598831008991991,
"learning_rate": 3.964574501457378e-05,
"loss": 0.2364,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11539462208747864,
"step": 275,
"valid_targets_mean": 4844.2,
"valid_targets_min": 1548
},
{
"epoch": 1.1023622047244095,
"grad_norm": 0.6481482966095616,
"learning_rate": 3.960800662161469e-05,
"loss": 0.2415,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14174574613571167,
"step": 280,
"valid_targets_mean": 4164.8,
"valid_targets_min": 2416
},
{
"epoch": 1.1220472440944882,
"grad_norm": 0.5705041912855937,
"learning_rate": 3.9568378368166406e-05,
"loss": 0.2412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12836569547653198,
"step": 285,
"valid_targets_mean": 4750.2,
"valid_targets_min": 3645
},
{
"epoch": 1.141732283464567,
"grad_norm": 0.5082077633004719,
"learning_rate": 3.952686407368247e-05,
"loss": 0.2241,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11120018362998962,
"step": 290,
"valid_targets_mean": 4768.6,
"valid_targets_min": 1974
},
{
"epoch": 1.1614173228346456,
"grad_norm": 0.5706378094952566,
"learning_rate": 3.948346773939699e-05,
"loss": 0.2301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11293789744377136,
"step": 295,
"valid_targets_mean": 4541.8,
"valid_targets_min": 2743
},
{
"epoch": 1.1811023622047245,
"grad_norm": 0.4922308378914635,
"learning_rate": 3.943819354793899e-05,
"loss": 0.2291,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10452602803707123,
"step": 300,
"valid_targets_mean": 5448.6,
"valid_targets_min": 2404
},
{
"epoch": 1.2007874015748032,
"grad_norm": 0.5387400179528662,
"learning_rate": 3.9391045862929275e-05,
"loss": 0.2295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1042381078004837,
"step": 305,
"valid_targets_mean": 4613.5,
"valid_targets_min": 3431
},
{
"epoch": 1.220472440944882,
"grad_norm": 0.543270303331542,
"learning_rate": 3.934202922855985e-05,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12045516073703766,
"step": 310,
"valid_targets_mean": 4830.5,
"valid_targets_min": 2174
},
{
"epoch": 1.2401574803149606,
"grad_norm": 0.6259731725126749,
"learning_rate": 3.9291148369155964e-05,
"loss": 0.2317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12054137885570526,
"step": 315,
"valid_targets_mean": 5448.6,
"valid_targets_min": 3038
},
{
"epoch": 1.2598425196850394,
"grad_norm": 0.5170195225016588,
"learning_rate": 3.9238408188720745e-05,
"loss": 0.228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.137093648314476,
"step": 320,
"valid_targets_mean": 5749.8,
"valid_targets_min": 4157
},
{
"epoch": 1.279527559055118,
"grad_norm": 0.536250207131354,
"learning_rate": 3.918381377046255e-05,
"loss": 0.2295,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11249731481075287,
"step": 325,
"valid_targets_mean": 4286.9,
"valid_targets_min": 1763
},
{
"epoch": 1.2992125984251968,
"grad_norm": 1.2233809175419852,
"learning_rate": 3.9127370376305045e-05,
"loss": 0.2284,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11321394890546799,
"step": 330,
"valid_targets_mean": 3661.2,
"valid_targets_min": 2246
},
{
"epoch": 1.3188976377952755,
"grad_norm": 0.5244208110741391,
"learning_rate": 3.906908344638002e-05,
"loss": 0.2313,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12575654685497284,
"step": 335,
"valid_targets_mean": 5363.8,
"valid_targets_min": 3316
},
{
"epoch": 1.3385826771653544,
"grad_norm": 0.5058822126683837,
"learning_rate": 3.900895859850313e-05,
"loss": 0.2177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11146020889282227,
"step": 340,
"valid_targets_mean": 5342.9,
"valid_targets_min": 3870
},
{
"epoch": 1.358267716535433,
"grad_norm": 0.6083719615299573,
"learning_rate": 3.8947001627632326e-05,
"loss": 0.2347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11437221616506577,
"step": 345,
"valid_targets_mean": 3867.2,
"valid_targets_min": 2085
},
{
"epoch": 1.3779527559055118,
"grad_norm": 0.49824452517044576,
"learning_rate": 3.888321850530943e-05,
"loss": 0.228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10427597165107727,
"step": 350,
"valid_targets_mean": 5062.4,
"valid_targets_min": 3465
},
{
"epoch": 1.3976377952755905,
"grad_norm": 0.6132922001159666,
"learning_rate": 3.8817615379084514e-05,
"loss": 0.2347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11571554839611053,
"step": 355,
"valid_targets_mean": 4749.2,
"valid_targets_min": 3897
},
{
"epoch": 1.4173228346456692,
"grad_norm": 0.5252013989637385,
"learning_rate": 3.875019857192343e-05,
"loss": 0.2254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09572583436965942,
"step": 360,
"valid_targets_mean": 4404.6,
"valid_targets_min": 1445
},
{
"epoch": 1.4370078740157481,
"grad_norm": 0.5170101847117207,
"learning_rate": 3.8680974581598375e-05,
"loss": 0.2292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10469076037406921,
"step": 365,
"valid_targets_mean": 4567.5,
"valid_targets_min": 3487
},
{
"epoch": 1.4566929133858268,
"grad_norm": 0.46837458769096857,
"learning_rate": 3.860995008006161e-05,
"loss": 0.228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10281511396169662,
"step": 370,
"valid_targets_mean": 4906.5,
"valid_targets_min": 1889
},
{
"epoch": 1.4763779527559056,
"grad_norm": 0.5308373621274018,
"learning_rate": 3.853713191280242e-05,
"loss": 0.2268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11627548933029175,
"step": 375,
"valid_targets_mean": 4442.6,
"valid_targets_min": 3512
},
{
"epoch": 1.4960629921259843,
"grad_norm": 0.5050848403870765,
"learning_rate": 3.846252709818733e-05,
"loss": 0.2255,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09501666575670242,
"step": 380,
"valid_targets_mean": 4419.6,
"valid_targets_min": 2980
},
{
"epoch": 1.515748031496063,
"grad_norm": 0.5479569905901934,
"learning_rate": 3.8386142826783645e-05,
"loss": 0.225,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09823480248451233,
"step": 385,
"valid_targets_mean": 4140.8,
"valid_targets_min": 2848
},
{
"epoch": 1.5354330708661417,
"grad_norm": 0.5131918332765538,
"learning_rate": 3.830798646066642e-05,
"loss": 0.2239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10701829940080643,
"step": 390,
"valid_targets_mean": 5092.4,
"valid_targets_min": 2858
},
{
"epoch": 1.5551181102362204,
"grad_norm": 0.5451705432713956,
"learning_rate": 3.8228065532708905e-05,
"loss": 0.2211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11911598592996597,
"step": 395,
"valid_targets_mean": 4980.0,
"valid_targets_min": 2032
},
{
"epoch": 1.574803149606299,
"grad_norm": 0.5179848621534485,
"learning_rate": 3.814638774585648e-05,
"loss": 0.2215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12453673034906387,
"step": 400,
"valid_targets_mean": 4640.0,
"valid_targets_min": 3069
},
{
"epoch": 1.594488188976378,
"grad_norm": 0.571745402931148,
"learning_rate": 3.8062960972384223e-05,
"loss": 0.2339,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12252622842788696,
"step": 405,
"valid_targets_mean": 4845.2,
"valid_targets_min": 2313
},
{
"epoch": 1.6141732283464567,
"grad_norm": 0.5136310790331813,
"learning_rate": 3.797779325313822e-05,
"loss": 0.2194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.116617351770401,
"step": 410,
"valid_targets_mean": 4400.0,
"valid_targets_min": 2131
},
{
"epoch": 1.6338582677165354,
"grad_norm": 0.5444550656365863,
"learning_rate": 3.78908927967605e-05,
"loss": 0.2261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09920500218868256,
"step": 415,
"valid_targets_mean": 3801.4,
"valid_targets_min": 929
},
{
"epoch": 1.6535433070866141,
"grad_norm": 0.5651068175229031,
"learning_rate": 3.780226797889794e-05,
"loss": 0.2279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11923803389072418,
"step": 420,
"valid_targets_mean": 5343.2,
"valid_targets_min": 3792
},
{
"epoch": 1.673228346456693,
"grad_norm": 0.528414522355408,
"learning_rate": 3.7711927341394916e-05,
"loss": 0.2264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11008419096469879,
"step": 425,
"valid_targets_mean": 4617.0,
"valid_targets_min": 1825
},
{
"epoch": 1.6929133858267718,
"grad_norm": 0.49319336490044646,
"learning_rate": 3.761987959147012e-05,
"loss": 0.227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1122782975435257,
"step": 430,
"valid_targets_mean": 4724.9,
"valid_targets_min": 1939
},
{
"epoch": 1.7125984251968505,
"grad_norm": 0.6816658995037583,
"learning_rate": 3.7526133600877275e-05,
"loss": 0.2231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10677564144134521,
"step": 435,
"valid_targets_mean": 3878.1,
"valid_targets_min": 1817
},
{
"epoch": 1.7322834645669292,
"grad_norm": 0.5577633000837552,
"learning_rate": 3.743069840505006e-05,
"loss": 0.2237,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11926381289958954,
"step": 440,
"valid_targets_mean": 5655.0,
"valid_targets_min": 4862
},
{
"epoch": 1.7519685039370079,
"grad_norm": 0.5234860300477719,
"learning_rate": 3.733358320223128e-05,
"loss": 0.2219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0971926674246788,
"step": 445,
"valid_targets_mean": 4074.6,
"valid_targets_min": 3067
},
{
"epoch": 1.7716535433070866,
"grad_norm": 0.4937946217850683,
"learning_rate": 3.723479735258633e-05,
"loss": 0.2207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1019144281744957,
"step": 450,
"valid_targets_mean": 4700.1,
"valid_targets_min": 2801
},
{
"epoch": 1.7913385826771653,
"grad_norm": 0.5570857669762997,
"learning_rate": 3.7134350377301e-05,
"loss": 0.2159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09371967613697052,
"step": 455,
"valid_targets_mean": 3744.6,
"valid_targets_min": 1971
},
{
"epoch": 1.811023622047244,
"grad_norm": 0.5416012458914922,
"learning_rate": 3.703225195766382e-05,
"loss": 0.2145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11869136989116669,
"step": 460,
"valid_targets_mean": 4309.0,
"valid_targets_min": 2595
},
{
"epoch": 1.8307086614173227,
"grad_norm": 0.6099457623503903,
"learning_rate": 3.692851193413299e-05,
"loss": 0.2195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10964755713939667,
"step": 465,
"valid_targets_mean": 4751.8,
"valid_targets_min": 1419
},
{
"epoch": 1.8503937007874016,
"grad_norm": 0.5034362298056623,
"learning_rate": 3.682314030538788e-05,
"loss": 0.2226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12900136411190033,
"step": 470,
"valid_targets_mean": 4964.6,
"valid_targets_min": 3367
},
{
"epoch": 1.8700787401574803,
"grad_norm": 0.5358362284677594,
"learning_rate": 3.671614722736541e-05,
"loss": 0.2275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11831079423427582,
"step": 475,
"valid_targets_mean": 5081.8,
"valid_targets_min": 3701
},
{
"epoch": 1.889763779527559,
"grad_norm": 0.5762542118351213,
"learning_rate": 3.6607543012281106e-05,
"loss": 0.2196,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08950397372245789,
"step": 480,
"valid_targets_mean": 3950.8,
"valid_targets_min": 2183
},
{
"epoch": 1.909448818897638,
"grad_norm": 0.5002014465401627,
"learning_rate": 3.649733812763527e-05,
"loss": 0.2159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12077131867408752,
"step": 485,
"valid_targets_mean": 5333.6,
"valid_targets_min": 2438
},
{
"epoch": 1.9291338582677167,
"grad_norm": 0.5338348406951574,
"learning_rate": 3.638554319520406e-05,
"loss": 0.2216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13127386569976807,
"step": 490,
"valid_targets_mean": 5023.8,
"valid_targets_min": 2810
},
{
"epoch": 1.9488188976377954,
"grad_norm": 0.5081775387200828,
"learning_rate": 3.627216899001575e-05,
"loss": 0.2232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09949933737516403,
"step": 495,
"valid_targets_mean": 4430.5,
"valid_targets_min": 2585
},
{
"epoch": 1.968503937007874,
"grad_norm": 0.50908063261341,
"learning_rate": 3.6157226439312186e-05,
"loss": 0.2224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08818402886390686,
"step": 500,
"valid_targets_mean": 3562.0,
"valid_targets_min": 2299
},
{
"epoch": 1.9881889763779528,
"grad_norm": 0.5207978644200455,
"learning_rate": 3.604072662149567e-05,
"loss": 0.2198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11189639568328857,
"step": 505,
"valid_targets_mean": 4364.1,
"valid_targets_min": 2387
},
{
"epoch": 2.0078740157480315,
"grad_norm": 0.5466084267492376,
"learning_rate": 3.5922680765061096e-05,
"loss": 0.2095,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0949828028678894,
"step": 510,
"valid_targets_mean": 4237.5,
"valid_targets_min": 2554
},
{
"epoch": 2.02755905511811,
"grad_norm": 0.5321407688955584,
"learning_rate": 3.580310024751381e-05,
"loss": 0.2057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11443229019641876,
"step": 515,
"valid_targets_mean": 4785.4,
"valid_targets_min": 3554
},
{
"epoch": 2.047244094488189,
"grad_norm": 0.525274992093487,
"learning_rate": 3.568199659427298e-05,
"loss": 0.2092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11236311495304108,
"step": 520,
"valid_targets_mean": 5097.4,
"valid_targets_min": 3642
},
{
"epoch": 2.0669291338582676,
"grad_norm": 0.5152344950397277,
"learning_rate": 3.555938147756077e-05,
"loss": 0.2056,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11348426342010498,
"step": 525,
"valid_targets_mean": 4983.1,
"valid_targets_min": 2589
},
{
"epoch": 2.0866141732283463,
"grad_norm": 0.5111936010700812,
"learning_rate": 3.543526671527733e-05,
"loss": 0.206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1142355352640152,
"step": 530,
"valid_targets_mean": 5025.6,
"valid_targets_min": 3773
},
{
"epoch": 2.106299212598425,
"grad_norm": 0.46330316147700296,
"learning_rate": 3.530966426986177e-05,
"loss": 0.205,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09499895572662354,
"step": 535,
"valid_targets_mean": 4627.2,
"valid_targets_min": 3145
},
{
"epoch": 2.1259842519685037,
"grad_norm": 0.5918068797159127,
"learning_rate": 3.51825862471392e-05,
"loss": 0.2088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09630045294761658,
"step": 540,
"valid_targets_mean": 3830.5,
"valid_targets_min": 1971
},
{
"epoch": 2.145669291338583,
"grad_norm": 0.510529225460911,
"learning_rate": 3.505404489515394e-05,
"loss": 0.1971,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09597675502300262,
"step": 545,
"valid_targets_mean": 4486.2,
"valid_targets_min": 1437
},
{
"epoch": 2.1653543307086616,
"grad_norm": 0.6594528183258365,
"learning_rate": 3.492405260298905e-05,
"loss": 0.2093,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08669960498809814,
"step": 550,
"valid_targets_mean": 3546.6,
"valid_targets_min": 1943
},
{
"epoch": 2.1850393700787403,
"grad_norm": 0.5254355595871669,
"learning_rate": 3.47926218995722e-05,
"loss": 0.2076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10595513880252838,
"step": 555,
"valid_targets_mean": 4496.1,
"valid_targets_min": 1889
},
{
"epoch": 2.204724409448819,
"grad_norm": 0.49557550566701525,
"learning_rate": 3.465976545246813e-05,
"loss": 0.2046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08661658316850662,
"step": 560,
"valid_targets_mean": 3980.4,
"valid_targets_min": 3066
},
{
"epoch": 2.2244094488188977,
"grad_norm": 0.5111732948542816,
"learning_rate": 3.4525496066657735e-05,
"loss": 0.2046,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11712317913770676,
"step": 565,
"valid_targets_mean": 4783.5,
"valid_targets_min": 2421
},
{
"epoch": 2.2440944881889764,
"grad_norm": 0.49358660342505406,
"learning_rate": 3.438982668330388e-05,
"loss": 0.1999,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09558248519897461,
"step": 570,
"valid_targets_mean": 5018.6,
"valid_targets_min": 3462
},
{
"epoch": 2.263779527559055,
"grad_norm": 0.563440251361422,
"learning_rate": 3.425277037850411e-05,
"loss": 0.2142,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12875157594680786,
"step": 575,
"valid_targets_mean": 5109.5,
"valid_targets_min": 3621
},
{
"epoch": 2.283464566929134,
"grad_norm": 0.5014531900079462,
"learning_rate": 3.411434036203035e-05,
"loss": 0.2057,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09397667646408081,
"step": 580,
"valid_targets_mean": 4514.8,
"valid_targets_min": 2598
},
{
"epoch": 2.3031496062992125,
"grad_norm": 0.5208273871593598,
"learning_rate": 3.397454997605569e-05,
"loss": 0.2116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09422179311513901,
"step": 585,
"valid_targets_mean": 3971.2,
"valid_targets_min": 1966
},
{
"epoch": 2.322834645669291,
"grad_norm": 0.5077057881847968,
"learning_rate": 3.38334126938685e-05,
"loss": 0.2004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09394000470638275,
"step": 590,
"valid_targets_mean": 4328.1,
"valid_targets_min": 1690
},
{
"epoch": 2.34251968503937,
"grad_norm": 0.5029165562822605,
"learning_rate": 3.369094211857378e-05,
"loss": 0.1973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10040949285030365,
"step": 595,
"valid_targets_mean": 4422.8,
"valid_targets_min": 2404
},
{
"epoch": 2.362204724409449,
"grad_norm": 0.5173178731677505,
"learning_rate": 3.354715198178211e-05,
"loss": 0.2044,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09321373701095581,
"step": 600,
"valid_targets_mean": 4853.1,
"valid_targets_min": 1882
},
{
"epoch": 2.3818897637795278,
"grad_norm": 0.5110742929714077,
"learning_rate": 3.3402056142286156e-05,
"loss": 0.208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12449011206626892,
"step": 605,
"valid_targets_mean": 5048.4,
"valid_targets_min": 3605
},
{
"epoch": 2.4015748031496065,
"grad_norm": 0.484575456099906,
"learning_rate": 3.3255668584724916e-05,
"loss": 0.2038,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11904991418123245,
"step": 610,
"valid_targets_mean": 5304.2,
"valid_targets_min": 3986
},
{
"epoch": 2.421259842519685,
"grad_norm": 0.4882002622536984,
"learning_rate": 3.310800341823588e-05,
"loss": 0.2071,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10080724954605103,
"step": 615,
"valid_targets_mean": 5046.1,
"valid_targets_min": 2715
},
{
"epoch": 2.440944881889764,
"grad_norm": 0.505835911090768,
"learning_rate": 3.2959074875095125e-05,
"loss": 0.2023,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11705048382282257,
"step": 620,
"valid_targets_mean": 4968.8,
"valid_targets_min": 3897
},
{
"epoch": 2.4606299212598426,
"grad_norm": 0.47441448533729824,
"learning_rate": 3.280889730934562e-05,
"loss": 0.2068,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0981658473610878,
"step": 625,
"valid_targets_mean": 5352.0,
"valid_targets_min": 1956
},
{
"epoch": 2.4803149606299213,
"grad_norm": 0.5148642841586799,
"learning_rate": 3.265748519541372e-05,
"loss": 0.2067,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1040315181016922,
"step": 630,
"valid_targets_mean": 4181.8,
"valid_targets_min": 3245
},
{
"epoch": 2.5,
"grad_norm": 0.4473554477912197,
"learning_rate": 3.250485312671411e-05,
"loss": 0.2024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10681141912937164,
"step": 635,
"valid_targets_mean": 5273.6,
"valid_targets_min": 3021
},
{
"epoch": 2.5196850393700787,
"grad_norm": 0.4973261610037662,
"learning_rate": 3.2351015814243235e-05,
"loss": 0.2058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09665346145629883,
"step": 640,
"valid_targets_mean": 4257.8,
"valid_targets_min": 1529
},
{
"epoch": 2.5393700787401574,
"grad_norm": 0.48701886585572,
"learning_rate": 3.219598808516148e-05,
"loss": 0.2006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09378744661808014,
"step": 645,
"valid_targets_mean": 4074.2,
"valid_targets_min": 1736
},
{
"epoch": 2.559055118110236,
"grad_norm": 0.4902038354931814,
"learning_rate": 3.203978488136403e-05,
"loss": 0.2008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09908495843410492,
"step": 650,
"valid_targets_mean": 4284.1,
"valid_targets_min": 3253
},
{
"epoch": 2.578740157480315,
"grad_norm": 0.49062360618564954,
"learning_rate": 3.188242125804078e-05,
"loss": 0.1947,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10086756199598312,
"step": 655,
"valid_targets_mean": 4713.5,
"valid_targets_min": 2958
},
{
"epoch": 2.5984251968503935,
"grad_norm": 0.461118649740814,
"learning_rate": 3.1723912382225267e-05,
"loss": 0.197,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10865049809217453,
"step": 660,
"valid_targets_mean": 7202.1,
"valid_targets_min": 3824
},
{
"epoch": 2.6181102362204722,
"grad_norm": 0.4933068672953504,
"learning_rate": 3.156427353133286e-05,
"loss": 0.2026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1005554273724556,
"step": 665,
"valid_targets_mean": 4939.8,
"valid_targets_min": 2053
},
{
"epoch": 2.637795275590551,
"grad_norm": 0.4668763634751758,
"learning_rate": 3.140352009168828e-05,
"loss": 0.1994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0940636545419693,
"step": 670,
"valid_targets_mean": 5163.4,
"valid_targets_min": 2860
},
{
"epoch": 2.65748031496063,
"grad_norm": 0.49559867473428193,
"learning_rate": 3.124166755704261e-05,
"loss": 0.1974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1019098088145256,
"step": 675,
"valid_targets_mean": 4997.8,
"valid_targets_min": 2572
},
{
"epoch": 2.677165354330709,
"grad_norm": 0.542720024397913,
"learning_rate": 3.1078731527080023e-05,
"loss": 0.2072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.112381711602211,
"step": 680,
"valid_targets_mean": 5089.8,
"valid_targets_min": 2932
},
{
"epoch": 2.6968503937007875,
"grad_norm": 0.4988125800209356,
"learning_rate": 3.09147277059142e-05,
"loss": 0.1985,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11727134883403778,
"step": 685,
"valid_targets_mean": 5363.6,
"valid_targets_min": 4063
},
{
"epoch": 2.716535433070866,
"grad_norm": 0.5392056382063659,
"learning_rate": 3.074967190057478e-05,
"loss": 0.2005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09954220056533813,
"step": 690,
"valid_targets_mean": 4351.2,
"valid_targets_min": 3520
},
{
"epoch": 2.736220472440945,
"grad_norm": 0.4607035747451114,
"learning_rate": 3.058358001948381e-05,
"loss": 0.1964,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10830067098140717,
"step": 695,
"valid_targets_mean": 5797.5,
"valid_targets_min": 2900
},
{
"epoch": 2.7559055118110236,
"grad_norm": 0.47953184166915724,
"learning_rate": 3.0416468070922504e-05,
"loss": 0.2109,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10138700157403946,
"step": 700,
"valid_targets_mean": 5056.4,
"valid_targets_min": 3807
},
{
"epoch": 2.7755905511811023,
"grad_norm": 0.5612227264608588,
"learning_rate": 3.0248352161488267e-05,
"loss": 0.2076,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1148558109998703,
"step": 705,
"valid_targets_mean": 4465.0,
"valid_targets_min": 2671
},
{
"epoch": 2.795275590551181,
"grad_norm": 0.4941943984378652,
"learning_rate": 3.007924849454235e-05,
"loss": 0.2039,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0908154547214508,
"step": 710,
"valid_targets_mean": 4094.6,
"valid_targets_min": 2159
},
{
"epoch": 2.8149606299212597,
"grad_norm": 0.4645730357534459,
"learning_rate": 2.9909173368648154e-05,
"loss": 0.2017,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.098426952958107,
"step": 715,
"valid_targets_mean": 5339.1,
"valid_targets_min": 3580
},
{
"epoch": 2.8346456692913384,
"grad_norm": 0.5000676264954633,
"learning_rate": 2.9738143176000287e-05,
"loss": 0.2026,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07998067885637283,
"step": 720,
"valid_targets_mean": 4276.6,
"valid_targets_min": 2867
},
{
"epoch": 2.8543307086614176,
"grad_norm": 0.759793927772002,
"learning_rate": 2.9566174400844692e-05,
"loss": 0.2054,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11061988770961761,
"step": 725,
"valid_targets_mean": 4561.0,
"valid_targets_min": 3068
},
{
"epoch": 2.8740157480314963,
"grad_norm": 0.5517561318694921,
"learning_rate": 2.9393283617889846e-05,
"loss": 0.199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09479944407939911,
"step": 730,
"valid_targets_mean": 3989.2,
"valid_targets_min": 1716
},
{
"epoch": 2.893700787401575,
"grad_norm": 0.5730088332694196,
"learning_rate": 2.921948749070925e-05,
"loss": 0.2079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10821133852005005,
"step": 735,
"valid_targets_mean": 4091.9,
"valid_targets_min": 1162
},
{
"epoch": 2.9133858267716537,
"grad_norm": 0.5117188223549288,
"learning_rate": 2.9044802770135375e-05,
"loss": 0.2058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10031283646821976,
"step": 740,
"valid_targets_mean": 3998.4,
"valid_targets_min": 2588
},
{
"epoch": 2.9330708661417324,
"grad_norm": 0.5738076164586253,
"learning_rate": 2.886924629264517e-05,
"loss": 0.201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0898476392030716,
"step": 745,
"valid_targets_mean": 4100.6,
"valid_targets_min": 2625
},
{
"epoch": 2.952755905511811,
"grad_norm": 0.4705109202476746,
"learning_rate": 2.8692834978737328e-05,
"loss": 0.2007,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08737783879041672,
"step": 750,
"valid_targets_mean": 4811.9,
"valid_targets_min": 2942
},
{
"epoch": 2.97244094488189,
"grad_norm": 0.5256205607708675,
"learning_rate": 2.8515585831301456e-05,
"loss": 0.2058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09472424536943436,
"step": 755,
"valid_targets_mean": 3864.2,
"valid_targets_min": 1848
},
{
"epoch": 2.9921259842519685,
"grad_norm": 0.5557955915234573,
"learning_rate": 2.83375159339793e-05,
"loss": 0.1944,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10027581453323364,
"step": 760,
"valid_targets_mean": 3908.1,
"valid_targets_min": 1269
},
{
"epoch": 3.0118110236220472,
"grad_norm": 0.489602811839751,
"learning_rate": 2.8158642449518186e-05,
"loss": 0.192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08896738290786743,
"step": 765,
"valid_targets_mean": 4385.0,
"valid_targets_min": 1612
},
{
"epoch": 3.031496062992126,
"grad_norm": 0.49676546976132585,
"learning_rate": 2.797898261811685e-05,
"loss": 0.1834,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07210229337215424,
"step": 770,
"valid_targets_mean": 4227.2,
"valid_targets_min": 1716
},
{
"epoch": 3.0511811023622046,
"grad_norm": 0.5444084127361962,
"learning_rate": 2.7798553755763768e-05,
"loss": 0.1902,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08889536559581757,
"step": 775,
"valid_targets_mean": 3636.1,
"valid_targets_min": 1437
},
{
"epoch": 3.0708661417322833,
"grad_norm": 0.48639626601900104,
"learning_rate": 2.7617373252568237e-05,
"loss": 0.1828,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07608135044574738,
"step": 780,
"valid_targets_mean": 3854.9,
"valid_targets_min": 2387
},
{
"epoch": 3.090551181102362,
"grad_norm": 0.5039938513859394,
"learning_rate": 2.7435458571084247e-05,
"loss": 0.1829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08201783895492554,
"step": 785,
"valid_targets_mean": 4603.8,
"valid_targets_min": 1606
},
{
"epoch": 3.1102362204724407,
"grad_norm": 0.4763098392205304,
"learning_rate": 2.725282724462743e-05,
"loss": 0.1877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09433779120445251,
"step": 790,
"valid_targets_mean": 4647.5,
"valid_targets_min": 2802
},
{
"epoch": 3.1299212598425195,
"grad_norm": 0.49773805149358136,
"learning_rate": 2.7069496875585145e-05,
"loss": 0.1851,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08049625158309937,
"step": 795,
"valid_targets_mean": 4359.8,
"valid_targets_min": 2157
},
{
"epoch": 3.1496062992125986,
"grad_norm": 0.5115330545437442,
"learning_rate": 2.688548513371994e-05,
"loss": 0.185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08574514836072922,
"step": 800,
"valid_targets_mean": 4656.5,
"valid_targets_min": 3067
},
{
"epoch": 3.1692913385826773,
"grad_norm": 0.5235143468775721,
"learning_rate": 2.670080975446648e-05,
"loss": 0.1913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09149648994207382,
"step": 805,
"valid_targets_mean": 4277.5,
"valid_targets_min": 1605
},
{
"epoch": 3.188976377952756,
"grad_norm": 0.49841632930623364,
"learning_rate": 2.6515488537222198e-05,
"loss": 0.1861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09565143287181854,
"step": 810,
"valid_targets_mean": 4569.1,
"valid_targets_min": 3176
},
{
"epoch": 3.2086614173228347,
"grad_norm": 0.49457383111631015,
"learning_rate": 2.6329539343631725e-05,
"loss": 0.1877,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09685587137937546,
"step": 815,
"valid_targets_mean": 4887.6,
"valid_targets_min": 3322
},
{
"epoch": 3.2283464566929134,
"grad_norm": 0.5679800649969203,
"learning_rate": 2.614298009586536e-05,
"loss": 0.1875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09396011382341385,
"step": 820,
"valid_targets_mean": 5448.8,
"valid_targets_min": 3129
},
{
"epoch": 3.248031496062992,
"grad_norm": 0.5551426275684962,
"learning_rate": 2.595582877489171e-05,
"loss": 0.1932,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0824180543422699,
"step": 825,
"valid_targets_mean": 3779.1,
"valid_targets_min": 2245
},
{
"epoch": 3.267716535433071,
"grad_norm": 0.5113779108721171,
"learning_rate": 2.57681034187446e-05,
"loss": 0.1923,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10713572055101395,
"step": 830,
"valid_targets_mean": 5406.5,
"valid_targets_min": 2183
},
{
"epoch": 3.2874015748031495,
"grad_norm": 0.5381591184386493,
"learning_rate": 2.557982212078459e-05,
"loss": 0.1853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.111358642578125,
"step": 835,
"valid_targets_mean": 5145.0,
"valid_targets_min": 3195
},
{
"epoch": 3.3070866141732282,
"grad_norm": 0.48731121571202646,
"learning_rate": 2.5391003027955045e-05,
"loss": 0.1878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08840745687484741,
"step": 840,
"valid_targets_mean": 4042.9,
"valid_targets_min": 1385
},
{
"epoch": 3.326771653543307,
"grad_norm": 0.520948548999774,
"learning_rate": 2.5201664339033138e-05,
"loss": 0.1867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09194193035364151,
"step": 845,
"valid_targets_mean": 4256.4,
"valid_targets_min": 1909
},
{
"epoch": 3.3464566929133857,
"grad_norm": 0.5340911973855548,
"learning_rate": 2.501182430287578e-05,
"loss": 0.1802,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07924988120794296,
"step": 850,
"valid_targets_mean": 4019.1,
"valid_targets_min": 1926
},
{
"epoch": 3.366141732283465,
"grad_norm": 0.5666395365279373,
"learning_rate": 2.4821501216660778e-05,
"loss": 0.184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07816565036773682,
"step": 855,
"valid_targets_mean": 4100.0,
"valid_targets_min": 940
},
{
"epoch": 3.3858267716535435,
"grad_norm": 0.46499272428101956,
"learning_rate": 2.4630713424123315e-05,
"loss": 0.1887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10357591509819031,
"step": 860,
"valid_targets_mean": 5542.5,
"valid_targets_min": 3182
},
{
"epoch": 3.405511811023622,
"grad_norm": 0.5264125211739766,
"learning_rate": 2.443947931378792e-05,
"loss": 0.1935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09696462750434875,
"step": 865,
"valid_targets_mean": 5404.4,
"valid_targets_min": 3877
},
{
"epoch": 3.425196850393701,
"grad_norm": 0.5264121206863542,
"learning_rate": 2.4247817317196188e-05,
"loss": 0.1857,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09942129999399185,
"step": 870,
"valid_targets_mean": 4794.9,
"valid_targets_min": 3514
},
{
"epoch": 3.4448818897637796,
"grad_norm": 0.5574236922356401,
"learning_rate": 2.405574590713025e-05,
"loss": 0.1842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09778232127428055,
"step": 875,
"valid_targets_mean": 3727.4,
"valid_targets_min": 1874
},
{
"epoch": 3.4645669291338583,
"grad_norm": 0.47628769156574696,
"learning_rate": 2.3863283595832387e-05,
"loss": 0.1837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10616715997457504,
"step": 880,
"valid_targets_mean": 5078.0,
"valid_targets_min": 2087
},
{
"epoch": 3.484251968503937,
"grad_norm": 0.5301112715778323,
"learning_rate": 2.3670448933220732e-05,
"loss": 0.192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08781129121780396,
"step": 885,
"valid_targets_mean": 4616.1,
"valid_targets_min": 3619
},
{
"epoch": 3.5039370078740157,
"grad_norm": 0.48688801676522564,
"learning_rate": 2.3477260505101427e-05,
"loss": 0.1888,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0931258425116539,
"step": 890,
"valid_targets_mean": 4153.9,
"valid_targets_min": 1556
},
{
"epoch": 3.5236220472440944,
"grad_norm": 0.47341831849766425,
"learning_rate": 2.328373693137726e-05,
"loss": 0.1815,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09009651839733124,
"step": 895,
"valid_targets_mean": 5211.2,
"valid_targets_min": 1823
},
{
"epoch": 3.543307086614173,
"grad_norm": 0.5378439473278651,
"learning_rate": 2.3089896864253066e-05,
"loss": 0.1848,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09250041097402573,
"step": 900,
"valid_targets_mean": 5271.4,
"valid_targets_min": 4075
},
{
"epoch": 3.562992125984252,
"grad_norm": 0.5053809064790767,
"learning_rate": 2.289575898643796e-05,
"loss": 0.1871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0888461098074913,
"step": 905,
"valid_targets_mean": 4520.5,
"valid_targets_min": 2860
},
{
"epoch": 3.5826771653543306,
"grad_norm": 0.5265904290190508,
"learning_rate": 2.270134200934466e-05,
"loss": 0.191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11478685587644577,
"step": 910,
"valid_targets_mean": 4733.8,
"valid_targets_min": 2375
},
{
"epoch": 3.6023622047244093,
"grad_norm": 0.6006788025580756,
"learning_rate": 2.2506664671286087e-05,
"loss": 0.1928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08821739256381989,
"step": 915,
"valid_targets_mean": 5459.4,
"valid_targets_min": 2010
},
{
"epoch": 3.622047244094488,
"grad_norm": 0.49323233428557733,
"learning_rate": 2.2311745735669258e-05,
"loss": 0.1893,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0934150367975235,
"step": 920,
"valid_targets_mean": 4659.5,
"valid_targets_min": 2887
},
{
"epoch": 3.6417322834645667,
"grad_norm": 0.5257624566413204,
"learning_rate": 2.2116603989186895e-05,
"loss": 0.1921,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12410484254360199,
"step": 925,
"valid_targets_mean": 4609.0,
"valid_targets_min": 2970
},
{
"epoch": 3.661417322834646,
"grad_norm": 0.4532378121829681,
"learning_rate": 2.192125824000667e-05,
"loss": 0.183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08430390059947968,
"step": 930,
"valid_targets_mean": 5128.5,
"valid_targets_min": 2248
},
{
"epoch": 3.6811023622047245,
"grad_norm": 0.4995656697469682,
"learning_rate": 2.1725727315958473e-05,
"loss": 0.1866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10000473260879517,
"step": 935,
"valid_targets_mean": 5442.2,
"valid_targets_min": 3615
},
{
"epoch": 3.7007874015748032,
"grad_norm": 0.5036346992884158,
"learning_rate": 2.1530030062719724e-05,
"loss": 0.1861,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1054472103714943,
"step": 940,
"valid_targets_mean": 5186.9,
"valid_targets_min": 3697
},
{
"epoch": 3.720472440944882,
"grad_norm": 0.4671367893199655,
"learning_rate": 2.1334185341999024e-05,
"loss": 0.1761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09384366869926453,
"step": 945,
"valid_targets_mean": 4929.9,
"valid_targets_min": 3960
},
{
"epoch": 3.7401574803149606,
"grad_norm": 0.4751047221718914,
"learning_rate": 2.1138212029718158e-05,
"loss": 0.1918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07663257420063019,
"step": 950,
"valid_targets_mean": 3935.1,
"valid_targets_min": 1943
},
{
"epoch": 3.7598425196850394,
"grad_norm": 0.49262828953153526,
"learning_rate": 2.0942129014192854e-05,
"loss": 0.192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0853680819272995,
"step": 955,
"valid_targets_mean": 4183.4,
"valid_targets_min": 1253
},
{
"epoch": 3.779527559055118,
"grad_norm": 0.6177236621428462,
"learning_rate": 2.0745955194312276e-05,
"loss": 0.187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09001902490854263,
"step": 960,
"valid_targets_mean": 4105.8,
"valid_targets_min": 961
},
{
"epoch": 3.7992125984251968,
"grad_norm": 0.4739848852743866,
"learning_rate": 2.054970947771747e-05,
"loss": 0.1837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0954812690615654,
"step": 965,
"valid_targets_mean": 4715.4,
"valid_targets_min": 2980
},
{
"epoch": 3.8188976377952755,
"grad_norm": 0.4886045635390971,
"learning_rate": 2.0353410778979076e-05,
"loss": 0.188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09902863204479218,
"step": 970,
"valid_targets_mean": 4622.1,
"valid_targets_min": 2823
},
{
"epoch": 3.838582677165354,
"grad_norm": 0.5100874173228366,
"learning_rate": 2.0157078017774228e-05,
"loss": 0.1872,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09035211056470871,
"step": 975,
"valid_targets_mean": 4455.8,
"valid_targets_min": 2887
},
{
"epoch": 3.8582677165354333,
"grad_norm": 0.4621944244036512,
"learning_rate": 1.99607301170631e-05,
"loss": 0.1842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0871577262878418,
"step": 980,
"valid_targets_mean": 4793.2,
"valid_targets_min": 3624
},
{
"epoch": 3.877952755905512,
"grad_norm": 0.5027938332863866,
"learning_rate": 1.9764386001265015e-05,
"loss": 0.194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08507096022367477,
"step": 985,
"valid_targets_mean": 4229.9,
"valid_targets_min": 2062
},
{
"epoch": 3.8976377952755907,
"grad_norm": 0.5044069417090365,
"learning_rate": 1.956806459443453e-05,
"loss": 0.1827,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09360580146312714,
"step": 990,
"valid_targets_mean": 4366.6,
"valid_targets_min": 1331
},
{
"epoch": 3.9173228346456694,
"grad_norm": 0.4158951842030391,
"learning_rate": 1.9371784818437436e-05,
"loss": 0.189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.090241938829422,
"step": 995,
"valid_targets_mean": 6768.0,
"valid_targets_min": 4122
},
{
"epoch": 3.937007874015748,
"grad_norm": 0.4745694648588442,
"learning_rate": 1.9175565591127073e-05,
"loss": 0.1887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09176523983478546,
"step": 1000,
"valid_targets_mean": 4334.9,
"valid_targets_min": 2032
},
{
"epoch": 3.956692913385827,
"grad_norm": 0.4364212376689397,
"learning_rate": 1.897942582452097e-05,
"loss": 0.1865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07971988618373871,
"step": 1005,
"valid_targets_mean": 5336.1,
"valid_targets_min": 2018
},
{
"epoch": 3.9763779527559056,
"grad_norm": 0.5511005102547655,
"learning_rate": 1.8783384422978066e-05,
"loss": 0.1896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11795173585414886,
"step": 1010,
"valid_targets_mean": 5471.5,
"valid_targets_min": 3410
},
{
"epoch": 3.9960629921259843,
"grad_norm": 0.6825209280817168,
"learning_rate": 1.8587460281376673e-05,
"loss": 0.1913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08119224011898041,
"step": 1015,
"valid_targets_mean": 4090.9,
"valid_targets_min": 1821
},
{
"epoch": 4.015748031496063,
"grad_norm": 0.44159331185888756,
"learning_rate": 1.8391672283293333e-05,
"loss": 0.1747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0800844132900238,
"step": 1020,
"valid_targets_mean": 4880.5,
"valid_targets_min": 1825
},
{
"epoch": 4.035433070866142,
"grad_norm": 0.49158869212460554,
"learning_rate": 1.8196039299182818e-05,
"loss": 0.1648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07483560591936111,
"step": 1025,
"valid_targets_mean": 4341.6,
"valid_targets_min": 1814
},
{
"epoch": 4.05511811023622,
"grad_norm": 0.5155005554765618,
"learning_rate": 1.8000580184559315e-05,
"loss": 0.1766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08504385501146317,
"step": 1030,
"valid_targets_mean": 4477.4,
"valid_targets_min": 2827
},
{
"epoch": 4.074803149606299,
"grad_norm": 0.48899712990779,
"learning_rate": 1.7805313778179095e-05,
"loss": 0.1711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08724971115589142,
"step": 1035,
"valid_targets_mean": 5161.0,
"valid_targets_min": 3101
},
{
"epoch": 4.094488188976378,
"grad_norm": 0.55889970736938,
"learning_rate": 1.7610258900224843e-05,
"loss": 0.1773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08534616231918335,
"step": 1040,
"valid_targets_mean": 4326.1,
"valid_targets_min": 2248
},
{
"epoch": 4.1141732283464565,
"grad_norm": 0.49815410887297984,
"learning_rate": 1.741543435049165e-05,
"loss": 0.1813,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0864620953798294,
"step": 1045,
"valid_targets_mean": 4624.0,
"valid_targets_min": 2594
},
{
"epoch": 4.133858267716535,
"grad_norm": 0.7827540107484134,
"learning_rate": 1.7220858906575126e-05,
"loss": 0.1776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08015921711921692,
"step": 1050,
"valid_targets_mean": 4723.1,
"valid_targets_min": 2157
},
{
"epoch": 4.153543307086614,
"grad_norm": 0.4730801734010594,
"learning_rate": 1.702655132206154e-05,
"loss": 0.1688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08305276930332184,
"step": 1055,
"valid_targets_mean": 4733.5,
"valid_targets_min": 2576
},
{
"epoch": 4.173228346456693,
"grad_norm": 0.5180429383283888,
"learning_rate": 1.6832530324720303e-05,
"loss": 0.172,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09678762406110764,
"step": 1060,
"valid_targets_mean": 5148.9,
"valid_targets_min": 3445
},
{
"epoch": 4.192913385826771,
"grad_norm": 0.4999900061611034,
"learning_rate": 1.6638814614698965e-05,
"loss": 0.184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08001478016376495,
"step": 1065,
"valid_targets_mean": 3956.1,
"valid_targets_min": 2401
},
{
"epoch": 4.21259842519685,
"grad_norm": 0.5285391113094148,
"learning_rate": 1.6445422862720845e-05,
"loss": 0.1757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08853181451559067,
"step": 1070,
"valid_targets_mean": 4346.4,
"valid_targets_min": 2970
},
{
"epoch": 4.232283464566929,
"grad_norm": 0.4763498051008598,
"learning_rate": 1.6252373708285505e-05,
"loss": 0.175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08076771348714828,
"step": 1075,
"valid_targets_mean": 4906.2,
"valid_targets_min": 4131
},
{
"epoch": 4.251968503937007,
"grad_norm": 0.49259588079991434,
"learning_rate": 1.6059685757872274e-05,
"loss": 0.1682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10008959472179413,
"step": 1080,
"valid_targets_mean": 5259.8,
"valid_targets_min": 2147
},
{
"epoch": 4.271653543307087,
"grad_norm": 0.4715065185842055,
"learning_rate": 1.5867377583146836e-05,
"loss": 0.1686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09387775510549545,
"step": 1085,
"valid_targets_mean": 5263.2,
"valid_targets_min": 1806
},
{
"epoch": 4.291338582677166,
"grad_norm": 0.5389936506507088,
"learning_rate": 1.567546771917135e-05,
"loss": 0.1808,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08452951908111572,
"step": 1090,
"valid_targets_mean": 3925.9,
"valid_targets_min": 2789
},
{
"epoch": 4.311023622047244,
"grad_norm": 0.6307796908583303,
"learning_rate": 1.548397466261793e-05,
"loss": 0.1771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07884350419044495,
"step": 1095,
"valid_targets_mean": 4155.8,
"valid_targets_min": 1767
},
{
"epoch": 4.330708661417323,
"grad_norm": 0.5112867698161492,
"learning_rate": 1.529291686998592e-05,
"loss": 0.1779,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0957280844449997,
"step": 1100,
"valid_targets_mean": 5302.2,
"valid_targets_min": 2786
},
{
"epoch": 4.350393700787402,
"grad_norm": 0.5149850002956461,
"learning_rate": 1.5102312755823053e-05,
"loss": 0.1739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07714081555604935,
"step": 1105,
"valid_targets_mean": 4015.5,
"valid_targets_min": 2457
},
{
"epoch": 4.3700787401574805,
"grad_norm": 0.4912535927800697,
"learning_rate": 1.4912180690950545e-05,
"loss": 0.175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08937303721904755,
"step": 1110,
"valid_targets_mean": 4789.8,
"valid_targets_min": 2954
},
{
"epoch": 4.389763779527559,
"grad_norm": 0.4966216245279888,
"learning_rate": 1.4722539000692548e-05,
"loss": 0.1726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09006039798259735,
"step": 1115,
"valid_targets_mean": 5076.4,
"valid_targets_min": 2018
},
{
"epoch": 4.409448818897638,
"grad_norm": 0.5329319846708648,
"learning_rate": 1.4533405963109867e-05,
"loss": 0.1739,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0972193032503128,
"step": 1120,
"valid_targets_mean": 4625.4,
"valid_targets_min": 2375
},
{
"epoch": 4.429133858267717,
"grad_norm": 0.4940995339694309,
"learning_rate": 1.434479980723833e-05,
"loss": 0.1795,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10279802978038788,
"step": 1125,
"valid_targets_mean": 4961.4,
"valid_targets_min": 3584
},
{
"epoch": 4.448818897637795,
"grad_norm": 0.5334602072933473,
"learning_rate": 1.4156738711331793e-05,
"loss": 0.1763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09925266355276108,
"step": 1130,
"valid_targets_mean": 4828.4,
"valid_targets_min": 2970
},
{
"epoch": 4.468503937007874,
"grad_norm": 0.4990688552042186,
"learning_rate": 1.3969240801110088e-05,
"loss": 0.1706,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0710003674030304,
"step": 1135,
"valid_targets_mean": 4154.5,
"valid_targets_min": 2302
},
{
"epoch": 4.488188976377953,
"grad_norm": 0.6135736938875075,
"learning_rate": 1.3782324148012061e-05,
"loss": 0.1723,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09071006625890732,
"step": 1140,
"valid_targets_mean": 5006.8,
"valid_targets_min": 2032
},
{
"epoch": 4.5078740157480315,
"grad_norm": 0.5387855632675069,
"learning_rate": 1.3596006767453766e-05,
"loss": 0.1704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08249925076961517,
"step": 1145,
"valid_targets_mean": 3870.4,
"valid_targets_min": 1956
},
{
"epoch": 4.52755905511811,
"grad_norm": 0.5123496452658673,
"learning_rate": 1.3410306617092134e-05,
"loss": 0.1821,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09201376885175705,
"step": 1150,
"valid_targets_mean": 4862.6,
"valid_targets_min": 2642
},
{
"epoch": 4.547244094488189,
"grad_norm": 0.453771631379653,
"learning_rate": 1.3225241595094173e-05,
"loss": 0.1767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.082999587059021,
"step": 1155,
"valid_targets_mean": 5168.2,
"valid_targets_min": 3306
},
{
"epoch": 4.566929133858268,
"grad_norm": 0.5069858179077319,
"learning_rate": 1.3040829538411876e-05,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08771272003650665,
"step": 1160,
"valid_targets_mean": 4984.4,
"valid_targets_min": 3184
},
{
"epoch": 4.586614173228346,
"grad_norm": 0.6123964626310717,
"learning_rate": 1.2857088221063099e-05,
"loss": 0.1785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11825212836265564,
"step": 1165,
"valid_targets_mean": 4579.2,
"valid_targets_min": 2498
},
{
"epoch": 4.606299212598425,
"grad_norm": 0.48624262970146354,
"learning_rate": 1.2674035352418425e-05,
"loss": 0.1782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09413884580135345,
"step": 1170,
"valid_targets_mean": 4823.4,
"valid_targets_min": 2289
},
{
"epoch": 4.625984251968504,
"grad_norm": 0.45011980171776417,
"learning_rate": 1.2491688575494337e-05,
"loss": 0.1658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0619700625538826,
"step": 1175,
"valid_targets_mean": 4845.9,
"valid_targets_min": 2650
},
{
"epoch": 4.645669291338582,
"grad_norm": 0.8408949988873313,
"learning_rate": 1.231006546525273e-05,
"loss": 0.1641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08598130941390991,
"step": 1180,
"valid_targets_mean": 5306.0,
"valid_targets_min": 3479
},
{
"epoch": 4.665354330708661,
"grad_norm": 0.49373419184619244,
"learning_rate": 1.2129183526906971e-05,
"loss": 0.1657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0851750522851944,
"step": 1185,
"valid_targets_mean": 5045.0,
"valid_targets_min": 3291
},
{
"epoch": 4.68503937007874,
"grad_norm": 0.5326405536951732,
"learning_rate": 1.1949060194234775e-05,
"loss": 0.1761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08078961819410324,
"step": 1190,
"valid_targets_mean": 4150.8,
"valid_targets_min": 1984
},
{
"epoch": 4.7047244094488185,
"grad_norm": 0.5394844587783858,
"learning_rate": 1.1769712827897825e-05,
"loss": 0.1807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09843495488166809,
"step": 1195,
"valid_targets_mean": 4312.4,
"valid_targets_min": 1766
},
{
"epoch": 4.724409448818898,
"grad_norm": 0.5146079393537658,
"learning_rate": 1.159115871376858e-05,
"loss": 0.1746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09015670418739319,
"step": 1200,
"valid_targets_mean": 4489.9,
"valid_targets_min": 3173
},
{
"epoch": 4.744094488188976,
"grad_norm": 0.49026915012450123,
"learning_rate": 1.1413415061264205e-05,
"loss": 0.1763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09576011449098587,
"step": 1205,
"valid_targets_mean": 5141.9,
"valid_targets_min": 3199
},
{
"epoch": 4.7637795275590555,
"grad_norm": 0.4984489699827655,
"learning_rate": 1.1236499001687853e-05,
"loss": 0.1796,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08644038438796997,
"step": 1210,
"valid_targets_mean": 4635.4,
"valid_targets_min": 3182
},
{
"epoch": 4.783464566929134,
"grad_norm": 0.5366899061765646,
"learning_rate": 1.106042758657758e-05,
"loss": 0.1682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0846748873591423,
"step": 1215,
"valid_targets_mean": 5176.8,
"valid_targets_min": 1385
},
{
"epoch": 4.803149606299213,
"grad_norm": 0.5222825496648152,
"learning_rate": 1.0885217786062837e-05,
"loss": 0.1822,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08502589166164398,
"step": 1220,
"valid_targets_mean": 3747.8,
"valid_targets_min": 2284
},
{
"epoch": 4.822834645669292,
"grad_norm": 0.5173757722479255,
"learning_rate": 1.0710886487228868e-05,
"loss": 0.179,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09386999905109406,
"step": 1225,
"valid_targets_mean": 4350.2,
"valid_targets_min": 2937
},
{
"epoch": 4.84251968503937,
"grad_norm": 0.5527314732405585,
"learning_rate": 1.053745049248911e-05,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09248842298984528,
"step": 1230,
"valid_targets_mean": 4032.8,
"valid_targets_min": 1140
},
{
"epoch": 4.862204724409449,
"grad_norm": 0.5343992927396752,
"learning_rate": 1.0364926517965693e-05,
"loss": 0.1746,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09141820669174194,
"step": 1235,
"valid_targets_mean": 4472.9,
"valid_targets_min": 2174
},
{
"epoch": 4.881889763779528,
"grad_norm": 0.5185049275411711,
"learning_rate": 1.0193331191878388e-05,
"loss": 0.176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08886758983135223,
"step": 1240,
"valid_targets_mean": 4476.9,
"valid_targets_min": 1848
},
{
"epoch": 4.9015748031496065,
"grad_norm": 0.5285277008214899,
"learning_rate": 1.0022681052941856e-05,
"loss": 0.1805,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07690629363059998,
"step": 1245,
"valid_targets_mean": 4140.1,
"valid_targets_min": 989
},
{
"epoch": 4.921259842519685,
"grad_norm": 0.45158866163600003,
"learning_rate": 9.852992548771674e-06,
"loss": 0.1702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08501986414194107,
"step": 1250,
"valid_targets_mean": 5294.5,
"valid_targets_min": 3235
},
{
"epoch": 4.940944881889764,
"grad_norm": 0.4967531116537934,
"learning_rate": 9.684282034299053e-06,
"loss": 0.185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08608633279800415,
"step": 1255,
"valid_targets_mean": 4746.0,
"valid_targets_min": 3438
},
{
"epoch": 4.960629921259843,
"grad_norm": 0.4936377127518467,
"learning_rate": 9.516565770194523e-06,
"loss": 0.1765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08558323979377747,
"step": 1260,
"valid_targets_mean": 4610.6,
"valid_targets_min": 2034
},
{
"epoch": 4.980314960629921,
"grad_norm": 0.522499133636897,
"learning_rate": 9.349859921300704e-06,
"loss": 0.1767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08458113670349121,
"step": 1265,
"valid_targets_mean": 4060.1,
"valid_targets_min": 2063
},
{
"epoch": 5.0,
"grad_norm": 0.5026944790888711,
"learning_rate": 9.184180555074258e-06,
"loss": 0.1715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09565676003694534,
"step": 1270,
"valid_targets_mean": 5404.0,
"valid_targets_min": 4172
},
{
"epoch": 5.019685039370079,
"grad_norm": 0.4921818717334341,
"learning_rate": 9.019543640037363e-06,
"loss": 0.1633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08606697618961334,
"step": 1275,
"valid_targets_mean": 4479.0,
"valid_targets_min": 2423
},
{
"epoch": 5.039370078740157,
"grad_norm": 0.477118599526538,
"learning_rate": 8.855965044238554e-06,
"loss": 0.1683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07594689726829529,
"step": 1280,
"valid_targets_mean": 4830.5,
"valid_targets_min": 2849
},
{
"epoch": 5.059055118110236,
"grad_norm": 0.5637298264191778,
"learning_rate": 8.693460533723346e-06,
"loss": 0.1614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07979156076908112,
"step": 1285,
"valid_targets_mean": 3962.8,
"valid_targets_min": 2932
},
{
"epoch": 5.078740157480315,
"grad_norm": 0.5314691562280671,
"learning_rate": 8.532045771014693e-06,
"loss": 0.1663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09139946103096008,
"step": 1290,
"valid_targets_mean": 5029.0,
"valid_targets_min": 4129
},
{
"epoch": 5.0984251968503935,
"grad_norm": 0.4751096505635553,
"learning_rate": 8.37173631360339e-06,
"loss": 0.1637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06722073256969452,
"step": 1295,
"valid_targets_mean": 3845.1,
"valid_targets_min": 1445
},
{
"epoch": 5.118110236220472,
"grad_norm": 0.524191832238973,
"learning_rate": 8.212547612448595e-06,
"loss": 0.1701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08154323697090149,
"step": 1300,
"valid_targets_mean": 3978.8,
"valid_targets_min": 2066
},
{
"epoch": 5.137795275590551,
"grad_norm": 0.6085871704436021,
"learning_rate": 8.054495010488658e-06,
"loss": 0.1691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09445767104625702,
"step": 1305,
"valid_targets_mean": 4637.4,
"valid_targets_min": 2404
},
{
"epoch": 5.15748031496063,
"grad_norm": 0.5108909188996885,
"learning_rate": 7.897593741162316e-06,
"loss": 0.1696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09972027689218521,
"step": 1310,
"valid_targets_mean": 5083.0,
"valid_targets_min": 2604
},
{
"epoch": 5.177165354330708,
"grad_norm": 0.48122449331080136,
"learning_rate": 7.741858926940475e-06,
"loss": 0.1668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08790165185928345,
"step": 1315,
"valid_targets_mean": 5444.8,
"valid_targets_min": 2368
},
{
"epoch": 5.196850393700787,
"grad_norm": 0.512097148752623,
"learning_rate": 7.587305577868658e-06,
"loss": 0.1701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09286567568778992,
"step": 1320,
"valid_targets_mean": 5531.2,
"valid_targets_min": 2818
},
{
"epoch": 5.216535433070866,
"grad_norm": 0.5338641878283157,
"learning_rate": 7.433948590120326e-06,
"loss": 0.1575,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09053854644298553,
"step": 1325,
"valid_targets_mean": 4526.5,
"valid_targets_min": 2647
},
{
"epoch": 5.2362204724409445,
"grad_norm": 0.5473778139096743,
"learning_rate": 7.281802744561166e-06,
"loss": 0.164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0884595662355423,
"step": 1330,
"valid_targets_mean": 5014.4,
"valid_targets_min": 2439
},
{
"epoch": 5.255905511811024,
"grad_norm": 0.5048699184213502,
"learning_rate": 7.130882705324422e-06,
"loss": 0.1708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08006680011749268,
"step": 1335,
"valid_targets_mean": 4732.8,
"valid_targets_min": 2584
},
{
"epoch": 5.275590551181103,
"grad_norm": 0.49658609082278143,
"learning_rate": 6.9812030183976e-06,
"loss": 0.1599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07890729606151581,
"step": 1340,
"valid_targets_mean": 4871.1,
"valid_targets_min": 2214
},
{
"epoch": 5.2952755905511815,
"grad_norm": 0.5555183690469903,
"learning_rate": 6.832778110220457e-06,
"loss": 0.167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07105297595262527,
"step": 1345,
"valid_targets_mean": 3463.1,
"valid_targets_min": 1820
},
{
"epoch": 5.31496062992126,
"grad_norm": 0.5045405716389756,
"learning_rate": 6.685622286294571e-06,
"loss": 0.1657,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09518933296203613,
"step": 1350,
"valid_targets_mean": 5290.8,
"valid_targets_min": 1960
},
{
"epoch": 5.334645669291339,
"grad_norm": 0.5153397019945123,
"learning_rate": 6.539749729804539e-06,
"loss": 0.166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07892471551895142,
"step": 1355,
"valid_targets_mean": 3727.4,
"valid_targets_min": 1966
},
{
"epoch": 5.354330708661418,
"grad_norm": 0.6338357200082234,
"learning_rate": 6.395174500250949e-06,
"loss": 0.1674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07854849100112915,
"step": 1360,
"valid_targets_mean": 4173.8,
"valid_targets_min": 1062
},
{
"epoch": 5.374015748031496,
"grad_norm": 0.5148655016822371,
"learning_rate": 6.251910532095349e-06,
"loss": 0.1705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09632918983697891,
"step": 1365,
"valid_targets_mean": 4951.4,
"valid_targets_min": 3166
},
{
"epoch": 5.393700787401575,
"grad_norm": 0.5229040924574366,
"learning_rate": 6.109971633417169e-06,
"loss": 0.1759,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08995474874973297,
"step": 1370,
"valid_targets_mean": 4442.6,
"valid_targets_min": 3129
},
{
"epoch": 5.413385826771654,
"grad_norm": 0.5172468334275507,
"learning_rate": 5.969371484582887e-06,
"loss": 0.1639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07358592003583908,
"step": 1375,
"valid_targets_mean": 3890.6,
"valid_targets_min": 2544
},
{
"epoch": 5.433070866141732,
"grad_norm": 0.49953724706356384,
"learning_rate": 5.830123636927485e-06,
"loss": 0.1662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.091441310942173,
"step": 1380,
"valid_targets_mean": 4748.2,
"valid_targets_min": 3548
},
{
"epoch": 5.452755905511811,
"grad_norm": 0.46859884726914874,
"learning_rate": 5.692241511448342e-06,
"loss": 0.1639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09200026094913483,
"step": 1385,
"valid_targets_mean": 6322.5,
"valid_targets_min": 3607
},
{
"epoch": 5.47244094488189,
"grad_norm": 0.5159895682679654,
"learning_rate": 5.555738397511699e-06,
"loss": 0.1623,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0870046317577362,
"step": 1390,
"valid_targets_mean": 4484.2,
"valid_targets_min": 1894
},
{
"epoch": 5.4921259842519685,
"grad_norm": 0.46758460204999724,
"learning_rate": 5.4206274515717735e-06,
"loss": 0.1674,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08001154661178589,
"step": 1395,
"valid_targets_mean": 5091.0,
"valid_targets_min": 2682
},
{
"epoch": 5.511811023622047,
"grad_norm": 0.4820065015325502,
"learning_rate": 5.286921695902749e-06,
"loss": 0.1683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07458669692277908,
"step": 1400,
"valid_targets_mean": 4493.5,
"valid_targets_min": 1763
},
{
"epoch": 5.531496062992126,
"grad_norm": 0.487336717815755,
"learning_rate": 5.154634017343662e-06,
"loss": 0.1726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08721338957548141,
"step": 1405,
"valid_targets_mean": 4988.9,
"valid_targets_min": 3120
},
{
"epoch": 5.551181102362205,
"grad_norm": 0.44111051115068833,
"learning_rate": 5.023777166056294e-06,
"loss": 0.1699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07111923396587372,
"step": 1410,
"valid_targets_mean": 4932.6,
"valid_targets_min": 4100
},
{
"epoch": 5.570866141732283,
"grad_norm": 0.5061829014965509,
"learning_rate": 4.89436375429633e-06,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08149853348731995,
"step": 1415,
"valid_targets_mean": 4592.6,
"valid_targets_min": 2518
},
{
"epoch": 5.590551181102362,
"grad_norm": 0.4702210327073961,
"learning_rate": 4.766406255197751e-06,
"loss": 0.1695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08787278831005096,
"step": 1420,
"valid_targets_mean": 5422.2,
"valid_targets_min": 1821
},
{
"epoch": 5.610236220472441,
"grad_norm": 0.51011872594175,
"learning_rate": 4.639917001570644e-06,
"loss": 0.1687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07647247612476349,
"step": 1425,
"valid_targets_mean": 4464.4,
"valid_targets_min": 1580
},
{
"epoch": 5.6299212598425195,
"grad_norm": 0.45225874565536467,
"learning_rate": 4.51490818471255e-06,
"loss": 0.166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07619109749794006,
"step": 1430,
"valid_targets_mean": 5799.1,
"valid_targets_min": 3991
},
{
"epoch": 5.649606299212598,
"grad_norm": 0.5204457827383387,
"learning_rate": 4.391391853233404e-06,
"loss": 0.1566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08643794804811478,
"step": 1435,
"valid_targets_mean": 4547.4,
"valid_targets_min": 1761
},
{
"epoch": 5.669291338582677,
"grad_norm": 0.5018965820955225,
"learning_rate": 4.269379911894336e-06,
"loss": 0.1673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07980623841285706,
"step": 1440,
"valid_targets_mean": 4891.4,
"valid_targets_min": 2385
},
{
"epoch": 5.688976377952756,
"grad_norm": 0.48851124171423876,
"learning_rate": 4.148884120460186e-06,
"loss": 0.1637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08670505881309509,
"step": 1445,
"valid_targets_mean": 5519.1,
"valid_targets_min": 3917
},
{
"epoch": 5.708661417322834,
"grad_norm": 0.543645860470132,
"learning_rate": 4.029916092566131e-06,
"loss": 0.1628,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0755152478814125,
"step": 1450,
"valid_targets_mean": 4137.2,
"valid_targets_min": 1825
},
{
"epoch": 5.728346456692913,
"grad_norm": 0.4570693628318921,
"learning_rate": 3.91248729459831e-06,
"loss": 0.1654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07983046770095825,
"step": 1455,
"valid_targets_mean": 4830.1,
"valid_targets_min": 3197
},
{
"epoch": 5.748031496062993,
"grad_norm": 0.5606035436876963,
"learning_rate": 3.796609044588686e-06,
"loss": 0.168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10572928190231323,
"step": 1460,
"valid_targets_mean": 4837.2,
"valid_targets_min": 3114
},
{
"epoch": 5.76771653543307,
"grad_norm": 0.5131984027357266,
"learning_rate": 3.682292511124179e-06,
"loss": 0.1673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10337848216295242,
"step": 1465,
"valid_targets_mean": 5513.4,
"valid_targets_min": 3276
},
{
"epoch": 5.78740157480315,
"grad_norm": 0.6117024928049992,
"learning_rate": 3.569548712270201e-06,
"loss": 0.1627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08476019650697708,
"step": 1470,
"valid_targets_mean": 4941.6,
"valid_targets_min": 2479
},
{
"epoch": 5.807086614173229,
"grad_norm": 0.5525084322003452,
"learning_rate": 3.4583885145087613e-06,
"loss": 0.1636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07605169713497162,
"step": 1475,
"valid_targets_mean": 3171.5,
"valid_targets_min": 1556
},
{
"epoch": 5.826771653543307,
"grad_norm": 0.5208779064615999,
"learning_rate": 3.348822631691082e-06,
"loss": 0.1671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08517937362194061,
"step": 1480,
"valid_targets_mean": 4597.2,
"valid_targets_min": 1457
},
{
"epoch": 5.846456692913386,
"grad_norm": 0.4557596592086496,
"learning_rate": 3.240861624004983e-06,
"loss": 0.1649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07405765354633331,
"step": 1485,
"valid_targets_mean": 5108.5,
"valid_targets_min": 2836
},
{
"epoch": 5.866141732283465,
"grad_norm": 0.5247598563805814,
"learning_rate": 3.1345158969570933e-06,
"loss": 0.166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08560183644294739,
"step": 1490,
"valid_targets_mean": 4850.5,
"valid_targets_min": 2014
},
{
"epoch": 5.8858267716535435,
"grad_norm": 0.5105090824120696,
"learning_rate": 3.0297957003699284e-06,
"loss": 0.1654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07326743006706238,
"step": 1495,
"valid_targets_mean": 4361.8,
"valid_targets_min": 3201
},
{
"epoch": 5.905511811023622,
"grad_norm": 0.526052324589551,
"learning_rate": 2.926711127393993e-06,
"loss": 0.1664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08639273792505264,
"step": 1500,
"valid_targets_mean": 4936.2,
"valid_targets_min": 2748
},
{
"epoch": 5.925196850393701,
"grad_norm": 0.5069852894034068,
"learning_rate": 2.8252721135349892e-06,
"loss": 0.1652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08522128313779831,
"step": 1505,
"valid_targets_mean": 4933.2,
"valid_targets_min": 3293
},
{
"epoch": 5.94488188976378,
"grad_norm": 0.5317532980361488,
"learning_rate": 2.7254884356961976e-06,
"loss": 0.1666,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08667327463626862,
"step": 1510,
"valid_targets_mean": 5303.9,
"valid_targets_min": 2926
},
{
"epoch": 5.964566929133858,
"grad_norm": 0.5015389678507081,
"learning_rate": 2.6273697112361786e-06,
"loss": 0.1671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08042607456445694,
"step": 1515,
"valid_targets_mean": 4583.9,
"valid_targets_min": 3242
},
{
"epoch": 5.984251968503937,
"grad_norm": 0.5204137114567977,
"learning_rate": 2.5309253970418056e-06,
"loss": 0.175,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10253343731164932,
"step": 1520,
"valid_targets_mean": 5489.5,
"valid_targets_min": 2938
},
{
"epoch": 6.003937007874016,
"grad_norm": 0.5604449436657137,
"learning_rate": 2.436164788616815e-06,
"loss": 0.1631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08368267863988876,
"step": 1525,
"valid_targets_mean": 4382.4,
"valid_targets_min": 1820
},
{
"epoch": 6.0236220472440944,
"grad_norm": 0.4789290088149069,
"learning_rate": 2.3430970191858873e-06,
"loss": 0.1686,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07611675560474396,
"step": 1530,
"valid_targets_mean": 4700.1,
"valid_targets_min": 1642
},
{
"epoch": 6.043307086614173,
"grad_norm": 0.5064128010686474,
"learning_rate": 2.2517310588143372e-06,
"loss": 0.1567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09015149623155594,
"step": 1535,
"valid_targets_mean": 4859.8,
"valid_targets_min": 3398
},
{
"epoch": 6.062992125984252,
"grad_norm": 0.4941812186523566,
"learning_rate": 2.1620757135435875e-06,
"loss": 0.1598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08436579257249832,
"step": 1540,
"valid_targets_mean": 4995.9,
"valid_targets_min": 3623
},
{
"epoch": 6.082677165354331,
"grad_norm": 0.5124674783729655,
"learning_rate": 2.0741396245424263e-06,
"loss": 0.1627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0881134569644928,
"step": 1545,
"valid_targets_mean": 4931.5,
"valid_targets_min": 2544
},
{
"epoch": 6.102362204724409,
"grad_norm": 0.49287033330684754,
"learning_rate": 1.9879312672741326e-06,
"loss": 0.1662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08833058178424835,
"step": 1550,
"valid_targets_mean": 4804.8,
"valid_targets_min": 2318
},
{
"epoch": 6.122047244094488,
"grad_norm": 0.45292937798730987,
"learning_rate": 1.903458950679613e-06,
"loss": 0.1567,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07980772852897644,
"step": 1555,
"valid_targets_mean": 5526.5,
"valid_targets_min": 2906
},
{
"epoch": 6.141732283464567,
"grad_norm": 0.5403685691058474,
"learning_rate": 1.820730816376548e-06,
"loss": 0.1614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07747212052345276,
"step": 1560,
"valid_targets_mean": 3924.8,
"valid_targets_min": 940
},
{
"epoch": 6.161417322834645,
"grad_norm": 0.4834691855432631,
"learning_rate": 1.7397548378747142e-06,
"loss": 0.1621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08273802697658539,
"step": 1565,
"valid_targets_mean": 5364.6,
"valid_targets_min": 3639
},
{
"epoch": 6.181102362204724,
"grad_norm": 0.48074583845274044,
"learning_rate": 1.660538819807458e-06,
"loss": 0.1624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07710836827754974,
"step": 1570,
"valid_targets_mean": 4421.6,
"valid_targets_min": 2368
},
{
"epoch": 6.200787401574803,
"grad_norm": 0.4894409396739944,
"learning_rate": 1.5830903971794765e-06,
"loss": 0.1588,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07420961558818817,
"step": 1575,
"valid_targets_mean": 4029.6,
"valid_targets_min": 2823
},
{
"epoch": 6.2204724409448815,
"grad_norm": 0.5099428277493933,
"learning_rate": 1.5074170346309357e-06,
"loss": 0.1672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08051269501447678,
"step": 1580,
"valid_targets_mean": 4882.5,
"valid_targets_min": 2229
},
{
"epoch": 6.24015748031496,
"grad_norm": 0.5077115103642125,
"learning_rate": 1.4335260257180262e-06,
"loss": 0.1613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08975529670715332,
"step": 1585,
"valid_targets_mean": 4585.4,
"valid_targets_min": 1868
},
{
"epoch": 6.259842519685039,
"grad_norm": 0.5111367313012644,
"learning_rate": 1.3614244922099796e-06,
"loss": 0.1592,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07821144908666611,
"step": 1590,
"valid_targets_mean": 4993.5,
"valid_targets_min": 1930
},
{
"epoch": 6.2795275590551185,
"grad_norm": 0.5519304104422702,
"learning_rate": 1.2911193834026548e-06,
"loss": 0.1642,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07283198833465576,
"step": 1595,
"valid_targets_mean": 4038.9,
"valid_targets_min": 2633
},
{
"epoch": 6.299212598425197,
"grad_norm": 0.48455181331356906,
"learning_rate": 1.222617475448773e-06,
"loss": 0.1638,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08460994809865952,
"step": 1600,
"valid_targets_mean": 5376.4,
"valid_targets_min": 4617
},
{
"epoch": 6.318897637795276,
"grad_norm": 0.48745982808093374,
"learning_rate": 1.1559253707048046e-06,
"loss": 0.1605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07833869755268097,
"step": 1605,
"valid_targets_mean": 5090.8,
"valid_targets_min": 1942
},
{
"epoch": 6.338582677165355,
"grad_norm": 0.6120466928838578,
"learning_rate": 1.0910494970946073e-06,
"loss": 0.161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08072391152381897,
"step": 1610,
"valid_targets_mean": 4195.8,
"valid_targets_min": 1510
},
{
"epoch": 6.358267716535433,
"grad_norm": 0.5298362878907048,
"learning_rate": 1.027996107489908e-06,
"loss": 0.1668,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08345876634120941,
"step": 1615,
"valid_targets_mean": 4413.0,
"valid_targets_min": 1837
},
{
"epoch": 6.377952755905512,
"grad_norm": 0.46095190732459473,
"learning_rate": 9.667712791076345e-07,
"loss": 0.1536,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0628621056675911,
"step": 1620,
"valid_targets_mean": 5097.9,
"valid_targets_min": 2980
},
{
"epoch": 6.397637795275591,
"grad_norm": 0.5292896556171527,
"learning_rate": 9.073809129241784e-07,
"loss": 0.1652,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08581379055976868,
"step": 1625,
"valid_targets_mean": 4281.0,
"valid_targets_min": 2568
},
{
"epoch": 6.417322834645669,
"grad_norm": 0.5305855369121426,
"learning_rate": 8.498307331066446e-07,
"loss": 0.1671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08045978844165802,
"step": 1630,
"valid_targets_mean": 4062.0,
"valid_targets_min": 1894
},
{
"epoch": 6.437007874015748,
"grad_norm": 0.4809399720073739,
"learning_rate": 7.941262864611387e-07,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0818355530500412,
"step": 1635,
"valid_targets_mean": 4981.8,
"valid_targets_min": 3452
},
{
"epoch": 6.456692913385827,
"grad_norm": 0.5239913334410846,
"learning_rate": 7.402729418981769e-07,
"loss": 0.1662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07759232819080353,
"step": 1640,
"valid_targets_mean": 4395.6,
"valid_targets_min": 2246
},
{
"epoch": 6.4763779527559056,
"grad_norm": 0.47893757701421025,
"learning_rate": 6.882758899151886e-07,
"loss": 0.1578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07555696368217468,
"step": 1645,
"valid_targets_mean": 4458.4,
"valid_targets_min": 2787
},
{
"epoch": 6.496062992125984,
"grad_norm": 0.5162910385282404,
"learning_rate": 6.381401420962729e-07,
"loss": 0.16,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08070987462997437,
"step": 1650,
"valid_targets_mean": 4350.5,
"valid_targets_min": 3276
},
{
"epoch": 6.515748031496063,
"grad_norm": 0.506279516199066,
"learning_rate": 5.898705306291508e-07,
"loss": 0.1641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09255889058113098,
"step": 1655,
"valid_targets_mean": 4753.9,
"valid_targets_min": 2159
},
{
"epoch": 6.535433070866142,
"grad_norm": 0.5418218433978077,
"learning_rate": 5.434717078394447e-07,
"loss": 0.1635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08933492004871368,
"step": 1660,
"valid_targets_mean": 4376.5,
"valid_targets_min": 1554
},
{
"epoch": 6.55511811023622,
"grad_norm": 0.5223130741854497,
"learning_rate": 4.98948145742264e-07,
"loss": 0.1655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07107318937778473,
"step": 1665,
"valid_targets_mean": 4354.6,
"valid_targets_min": 3022
},
{
"epoch": 6.574803149606299,
"grad_norm": 0.5173779362959591,
"learning_rate": 4.563041356111919e-07,
"loss": 0.1598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0851677805185318,
"step": 1670,
"valid_targets_mean": 4797.2,
"valid_targets_min": 3011
},
{
"epoch": 6.594488188976378,
"grad_norm": 0.5599025622422927,
"learning_rate": 4.155437875646828e-07,
"loss": 0.1624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08890414237976074,
"step": 1675,
"valid_targets_mean": 3807.6,
"valid_targets_min": 2243
},
{
"epoch": 6.6141732283464565,
"grad_norm": 0.5025184876238337,
"learning_rate": 3.7667103016992167e-07,
"loss": 0.1545,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07961277663707733,
"step": 1680,
"valid_targets_mean": 4944.1,
"valid_targets_min": 2626
},
{
"epoch": 6.633858267716535,
"grad_norm": 0.5014517579389977,
"learning_rate": 3.396896100641689e-07,
"loss": 0.1667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08768412470817566,
"step": 1685,
"valid_targets_mean": 5011.2,
"valid_targets_min": 3240
},
{
"epoch": 6.653543307086614,
"grad_norm": 0.4850086574076552,
"learning_rate": 3.0460309159366486e-07,
"loss": 0.1612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0838203877210617,
"step": 1690,
"valid_targets_mean": 4882.8,
"valid_targets_min": 3034
},
{
"epoch": 6.673228346456693,
"grad_norm": 0.5430212866747284,
"learning_rate": 2.714148564700914e-07,
"loss": 0.1625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.06709130853414536,
"step": 1695,
"valid_targets_mean": 4135.2,
"valid_targets_min": 1859
},
{
"epoch": 6.692913385826771,
"grad_norm": 0.5185336220440259,
"learning_rate": 2.401281034446212e-07,
"loss": 0.1627,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09611879289150238,
"step": 1700,
"valid_targets_mean": 4561.1,
"valid_targets_min": 1544
},
{
"epoch": 6.71259842519685,
"grad_norm": 0.510715490737711,
"learning_rate": 2.107458479996316e-07,
"loss": 0.1585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08368197828531265,
"step": 1705,
"valid_targets_mean": 4559.2,
"valid_targets_min": 3647
},
{
"epoch": 6.73228346456693,
"grad_norm": 0.5259828048113797,
"learning_rate": 1.8327092205805641e-07,
"loss": 0.1671,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08739855885505676,
"step": 1710,
"valid_targets_mean": 4918.9,
"valid_targets_min": 3152
},
{
"epoch": 6.751968503937007,
"grad_norm": 0.5181556670275821,
"learning_rate": 1.577059737104447e-07,
"loss": 0.1633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07951222360134125,
"step": 1715,
"valid_targets_mean": 4169.6,
"valid_targets_min": 2062
},
{
"epoch": 6.771653543307087,
"grad_norm": 0.4805983628208804,
"learning_rate": 1.3405346695972265e-07,
"loss": 0.1612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08790119737386703,
"step": 1720,
"valid_targets_mean": 4891.6,
"valid_targets_min": 3807
},
{
"epoch": 6.791338582677166,
"grad_norm": 0.5468037181943199,
"learning_rate": 1.1231568148372562e-07,
"loss": 0.1636,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07529794424772263,
"step": 1725,
"valid_targets_mean": 4327.4,
"valid_targets_min": 1330
},
{
"epoch": 6.811023622047244,
"grad_norm": 0.5755093339707816,
"learning_rate": 9.249471241545849e-08,
"loss": 0.1645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08119311928749084,
"step": 1730,
"valid_targets_mean": 4506.6,
"valid_targets_min": 1701
},
{
"epoch": 6.830708661417323,
"grad_norm": 0.517210307021055,
"learning_rate": 7.459247014117488e-08,
"loss": 0.1604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07390396296977997,
"step": 1735,
"valid_targets_mean": 4528.4,
"valid_targets_min": 3090
},
{
"epoch": 6.850393700787402,
"grad_norm": 0.5266811152011874,
"learning_rate": 5.861068011624449e-08,
"loss": 0.161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09163401275873184,
"step": 1740,
"valid_targets_mean": 4169.8,
"valid_targets_min": 2036
},
{
"epoch": 6.8700787401574805,
"grad_norm": 0.5389957834788685,
"learning_rate": 4.4550882698857214e-08,
"loss": 0.1682,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11459677666425705,
"step": 1745,
"valid_targets_mean": 5095.9,
"valid_targets_min": 4053
},
{
"epoch": 6.889763779527559,
"grad_norm": 0.4849160260971524,
"learning_rate": 3.241443300154856e-08,
"loss": 0.1602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07561354339122772,
"step": 1750,
"valid_targets_mean": 5546.2,
"valid_targets_min": 1140
},
{
"epoch": 6.909448818897638,
"grad_norm": 0.4851161576883834,
"learning_rate": 2.220250076060193e-08,
"loss": 0.1612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08798874169588089,
"step": 1755,
"valid_targets_mean": 5893.6,
"valid_targets_min": 1706
},
{
"epoch": 6.929133858267717,
"grad_norm": 0.5509225904804432,
"learning_rate": 1.3916070223298772e-08,
"loss": 0.1601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.07277875393629074,
"step": 1760,
"valid_targets_mean": 4170.6,
"valid_targets_min": 1751
},
{
"epoch": 6.948818897637795,
"grad_norm": 0.47110025867128225,
"learning_rate": 7.55594005306337e-09,
"loss": 0.1699,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08793185651302338,
"step": 1765,
"valid_targets_mean": 4946.0,
"valid_targets_min": 2839
},
{
"epoch": 6.968503937007874,
"grad_norm": 0.5083465220351081,
"learning_rate": 3.122723252477755e-09,
"loss": 0.1586,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0875038132071495,
"step": 1770,
"valid_targets_mean": 5438.4,
"valid_targets_min": 3447
},
{
"epoch": 6.988188976377953,
"grad_norm": 0.5182066580940692,
"learning_rate": 6.168471042067303e-10,
"loss": 0.1578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1035790890455246,
"step": 1775,
"valid_targets_mean": 5390.0,
"valid_targets_min": 3081
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0740923136472702,
"step": 1778,
"total_flos": 6.743229377418363e+17,
"train_loss": 0.20770151239345258,
"train_runtime": 16209.5842,
"train_samples_per_second": 1.754,
"train_steps_per_second": 0.11,
"valid_targets_mean": 4069.1,
"valid_targets_min": 2087
}
],
"logging_steps": 5,
"max_steps": 1778,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.743229377418363e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}