a1-stack_pytest_gpt5mini / trainer_state.json
EtashGuha's picture
Upload folder using huggingface_hub
a8a0cfd verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 3150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011111111111111112,
"grad_norm": 13.408544798581621,
"learning_rate": 5.07936507936508e-07,
"loss": 0.6676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5751245617866516,
"step": 5,
"valid_targets_mean": 5385.7,
"valid_targets_min": 310
},
{
"epoch": 0.022222222222222223,
"grad_norm": 16.21108259140317,
"learning_rate": 1.142857142857143e-06,
"loss": 0.658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.687087893486023,
"step": 10,
"valid_targets_mean": 3831.0,
"valid_targets_min": 293
},
{
"epoch": 0.03333333333333333,
"grad_norm": 12.338215088436316,
"learning_rate": 1.777777777777778e-06,
"loss": 0.6536,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.6238576173782349,
"step": 15,
"valid_targets_mean": 4839.5,
"valid_targets_min": 752
},
{
"epoch": 0.044444444444444446,
"grad_norm": 9.017393522713887,
"learning_rate": 2.412698412698413e-06,
"loss": 0.6353,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5550223588943481,
"step": 20,
"valid_targets_mean": 4619.8,
"valid_targets_min": 599
},
{
"epoch": 0.05555555555555555,
"grad_norm": 5.309166517130694,
"learning_rate": 3.047619047619048e-06,
"loss": 0.5386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.5435296893119812,
"step": 25,
"valid_targets_mean": 4110.9,
"valid_targets_min": 723
},
{
"epoch": 0.06666666666666667,
"grad_norm": 2.792780616604098,
"learning_rate": 3.6825396825396833e-06,
"loss": 0.5029,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.46670812368392944,
"step": 30,
"valid_targets_mean": 3455.2,
"valid_targets_min": 295
},
{
"epoch": 0.07777777777777778,
"grad_norm": 1.5959020052891244,
"learning_rate": 4.317460317460318e-06,
"loss": 0.4468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4236889183521271,
"step": 35,
"valid_targets_mean": 5468.6,
"valid_targets_min": 376
},
{
"epoch": 0.08888888888888889,
"grad_norm": 1.0408106474791263,
"learning_rate": 4.952380952380953e-06,
"loss": 0.4331,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.41928115487098694,
"step": 40,
"valid_targets_mean": 5478.6,
"valid_targets_min": 1395
},
{
"epoch": 0.1,
"grad_norm": 0.871196479469939,
"learning_rate": 5.5873015873015876e-06,
"loss": 0.4438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40255558490753174,
"step": 45,
"valid_targets_mean": 5138.5,
"valid_targets_min": 1811
},
{
"epoch": 0.1111111111111111,
"grad_norm": 0.8029816404229763,
"learning_rate": 6.222222222222223e-06,
"loss": 0.4316,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3899801969528198,
"step": 50,
"valid_targets_mean": 4666.9,
"valid_targets_min": 555
},
{
"epoch": 0.12222222222222222,
"grad_norm": 0.7515486206211541,
"learning_rate": 6.857142857142858e-06,
"loss": 0.4538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4362230896949768,
"step": 55,
"valid_targets_mean": 4455.1,
"valid_targets_min": 397
},
{
"epoch": 0.13333333333333333,
"grad_norm": 0.5887418945104569,
"learning_rate": 7.492063492063493e-06,
"loss": 0.3969,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3481084108352661,
"step": 60,
"valid_targets_mean": 5121.1,
"valid_targets_min": 322
},
{
"epoch": 0.14444444444444443,
"grad_norm": 0.6245277781771107,
"learning_rate": 8.126984126984128e-06,
"loss": 0.3905,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36860743165016174,
"step": 65,
"valid_targets_mean": 3914.1,
"valid_targets_min": 269
},
{
"epoch": 0.15555555555555556,
"grad_norm": 0.5810023779977105,
"learning_rate": 8.761904761904763e-06,
"loss": 0.3874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3317573368549347,
"step": 70,
"valid_targets_mean": 4313.6,
"valid_targets_min": 1168
},
{
"epoch": 0.16666666666666666,
"grad_norm": 1.0288706410047126,
"learning_rate": 9.396825396825398e-06,
"loss": 0.356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3530646562576294,
"step": 75,
"valid_targets_mean": 3485.5,
"valid_targets_min": 510
},
{
"epoch": 0.17777777777777778,
"grad_norm": 0.6056824865658866,
"learning_rate": 1.0031746031746033e-05,
"loss": 0.3536,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3671146035194397,
"step": 80,
"valid_targets_mean": 3908.6,
"valid_targets_min": 257
},
{
"epoch": 0.18888888888888888,
"grad_norm": 0.6162370696114597,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.3733,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3743559718132019,
"step": 85,
"valid_targets_mean": 4694.6,
"valid_targets_min": 435
},
{
"epoch": 0.2,
"grad_norm": 0.5931909767383229,
"learning_rate": 1.1301587301587302e-05,
"loss": 0.34,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3645175099372864,
"step": 90,
"valid_targets_mean": 4516.5,
"valid_targets_min": 630
},
{
"epoch": 0.2111111111111111,
"grad_norm": 0.5852862045298343,
"learning_rate": 1.1936507936507937e-05,
"loss": 0.3612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33980345726013184,
"step": 95,
"valid_targets_mean": 4287.4,
"valid_targets_min": 377
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.591535838571353,
"learning_rate": 1.2571428571428572e-05,
"loss": 0.3496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3416215479373932,
"step": 100,
"valid_targets_mean": 3520.2,
"valid_targets_min": 322
},
{
"epoch": 0.23333333333333334,
"grad_norm": 0.5517118398063217,
"learning_rate": 1.3206349206349206e-05,
"loss": 0.3417,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3045486807823181,
"step": 105,
"valid_targets_mean": 3936.1,
"valid_targets_min": 355
},
{
"epoch": 0.24444444444444444,
"grad_norm": 0.6036669845712245,
"learning_rate": 1.3841269841269843e-05,
"loss": 0.3631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.37737342715263367,
"step": 110,
"valid_targets_mean": 4156.3,
"valid_targets_min": 352
},
{
"epoch": 0.25555555555555554,
"grad_norm": 0.557851756149835,
"learning_rate": 1.4476190476190478e-05,
"loss": 0.33,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.31270942091941833,
"step": 115,
"valid_targets_mean": 4740.2,
"valid_targets_min": 764
},
{
"epoch": 0.26666666666666666,
"grad_norm": 0.5061593555204397,
"learning_rate": 1.5111111111111112e-05,
"loss": 0.3383,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3117508292198181,
"step": 120,
"valid_targets_mean": 5759.2,
"valid_targets_min": 836
},
{
"epoch": 0.2777777777777778,
"grad_norm": 0.5910898872360414,
"learning_rate": 1.5746031746031745e-05,
"loss": 0.3198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30763545632362366,
"step": 125,
"valid_targets_mean": 4890.5,
"valid_targets_min": 872
},
{
"epoch": 0.28888888888888886,
"grad_norm": 0.44647524167329333,
"learning_rate": 1.6380952380952384e-05,
"loss": 0.302,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27717962861061096,
"step": 130,
"valid_targets_mean": 5430.8,
"valid_targets_min": 869
},
{
"epoch": 0.3,
"grad_norm": 0.5879317402321064,
"learning_rate": 1.7015873015873018e-05,
"loss": 0.3243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30784285068511963,
"step": 135,
"valid_targets_mean": 3904.9,
"valid_targets_min": 393
},
{
"epoch": 0.3111111111111111,
"grad_norm": 0.5605756164325479,
"learning_rate": 1.7650793650793653e-05,
"loss": 0.2979,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2845287322998047,
"step": 140,
"valid_targets_mean": 4072.9,
"valid_targets_min": 517
},
{
"epoch": 0.32222222222222224,
"grad_norm": 0.5215552035079208,
"learning_rate": 1.8285714285714288e-05,
"loss": 0.3149,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26664093136787415,
"step": 145,
"valid_targets_mean": 4416.2,
"valid_targets_min": 335
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.5674313964489882,
"learning_rate": 1.8920634920634923e-05,
"loss": 0.3226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3291376233100891,
"step": 150,
"valid_targets_mean": 5014.2,
"valid_targets_min": 251
},
{
"epoch": 0.34444444444444444,
"grad_norm": 0.5110719079947695,
"learning_rate": 1.9555555555555557e-05,
"loss": 0.3088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29836922883987427,
"step": 155,
"valid_targets_mean": 4633.4,
"valid_targets_min": 656
},
{
"epoch": 0.35555555555555557,
"grad_norm": 0.6271829324260068,
"learning_rate": 2.0190476190476192e-05,
"loss": 0.3173,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.40676355361938477,
"step": 160,
"valid_targets_mean": 4353.1,
"valid_targets_min": 301
},
{
"epoch": 0.36666666666666664,
"grad_norm": 0.5661101605652876,
"learning_rate": 2.082539682539683e-05,
"loss": 0.3187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30450916290283203,
"step": 165,
"valid_targets_mean": 4222.2,
"valid_targets_min": 671
},
{
"epoch": 0.37777777777777777,
"grad_norm": 0.5792882504021095,
"learning_rate": 2.146031746031746e-05,
"loss": 0.2887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3169078230857849,
"step": 170,
"valid_targets_mean": 4392.9,
"valid_targets_min": 286
},
{
"epoch": 0.3888888888888889,
"grad_norm": 0.5061884693546234,
"learning_rate": 2.20952380952381e-05,
"loss": 0.3388,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32877248525619507,
"step": 175,
"valid_targets_mean": 4587.4,
"valid_targets_min": 603
},
{
"epoch": 0.4,
"grad_norm": 0.55673810264919,
"learning_rate": 2.273015873015873e-05,
"loss": 0.3079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3139079511165619,
"step": 180,
"valid_targets_mean": 5031.8,
"valid_targets_min": 391
},
{
"epoch": 0.4111111111111111,
"grad_norm": 0.7128274083633475,
"learning_rate": 2.336507936507937e-05,
"loss": 0.3189,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3152565360069275,
"step": 185,
"valid_targets_mean": 2480.9,
"valid_targets_min": 339
},
{
"epoch": 0.4222222222222222,
"grad_norm": 0.5610727534792233,
"learning_rate": 2.4e-05,
"loss": 0.3086,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3390665054321289,
"step": 190,
"valid_targets_mean": 4430.2,
"valid_targets_min": 591
},
{
"epoch": 0.43333333333333335,
"grad_norm": 0.8939323722899397,
"learning_rate": 2.463492063492064e-05,
"loss": 0.3016,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3454466760158539,
"step": 195,
"valid_targets_mean": 3394.8,
"valid_targets_min": 693
},
{
"epoch": 0.4444444444444444,
"grad_norm": 1.0429529339999148,
"learning_rate": 2.526984126984127e-05,
"loss": 0.3146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2936026453971863,
"step": 200,
"valid_targets_mean": 5333.9,
"valid_targets_min": 883
},
{
"epoch": 0.45555555555555555,
"grad_norm": 0.5853756503183534,
"learning_rate": 2.5904761904761908e-05,
"loss": 0.3152,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3078632950782776,
"step": 205,
"valid_targets_mean": 3632.1,
"valid_targets_min": 636
},
{
"epoch": 0.4666666666666667,
"grad_norm": 0.5715675965569004,
"learning_rate": 2.653968253968254e-05,
"loss": 0.3122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3024342656135559,
"step": 210,
"valid_targets_mean": 4422.1,
"valid_targets_min": 328
},
{
"epoch": 0.4777777777777778,
"grad_norm": 0.6821942507785,
"learning_rate": 2.7174603174603178e-05,
"loss": 0.3154,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.36502134799957275,
"step": 215,
"valid_targets_mean": 3621.6,
"valid_targets_min": 239
},
{
"epoch": 0.4888888888888889,
"grad_norm": 0.6021391191144772,
"learning_rate": 2.780952380952381e-05,
"loss": 0.28,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25724563002586365,
"step": 220,
"valid_targets_mean": 4738.3,
"valid_targets_min": 656
},
{
"epoch": 0.5,
"grad_norm": 0.5234960619653491,
"learning_rate": 2.8444444444444447e-05,
"loss": 0.3075,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33516725897789,
"step": 225,
"valid_targets_mean": 4865.0,
"valid_targets_min": 231
},
{
"epoch": 0.5111111111111111,
"grad_norm": 0.547550500956153,
"learning_rate": 2.9079365079365082e-05,
"loss": 0.2884,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29283851385116577,
"step": 230,
"valid_targets_mean": 4564.4,
"valid_targets_min": 1213
},
{
"epoch": 0.5222222222222223,
"grad_norm": 0.5897013069621678,
"learning_rate": 2.9714285714285717e-05,
"loss": 0.2937,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30815833806991577,
"step": 235,
"valid_targets_mean": 3828.2,
"valid_targets_min": 325
},
{
"epoch": 0.5333333333333333,
"grad_norm": 0.5735263579255174,
"learning_rate": 3.034920634920635e-05,
"loss": 0.2914,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30891531705856323,
"step": 240,
"valid_targets_mean": 4256.2,
"valid_targets_min": 664
},
{
"epoch": 0.5444444444444444,
"grad_norm": 0.5249052908854333,
"learning_rate": 3.098412698412699e-05,
"loss": 0.308,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26125839352607727,
"step": 245,
"valid_targets_mean": 4620.4,
"valid_targets_min": 995
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.5405962329977203,
"learning_rate": 3.161904761904762e-05,
"loss": 0.2811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26012593507766724,
"step": 250,
"valid_targets_mean": 4433.9,
"valid_targets_min": 629
},
{
"epoch": 0.5666666666666667,
"grad_norm": 0.8775775341101245,
"learning_rate": 3.225396825396826e-05,
"loss": 0.2898,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23986881971359253,
"step": 255,
"valid_targets_mean": 3096.6,
"valid_targets_min": 606
},
{
"epoch": 0.5777777777777777,
"grad_norm": 0.540108337375447,
"learning_rate": 3.288888888888889e-05,
"loss": 0.2962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26411673426628113,
"step": 260,
"valid_targets_mean": 5320.4,
"valid_targets_min": 679
},
{
"epoch": 0.5888888888888889,
"grad_norm": 0.5089656627557569,
"learning_rate": 3.352380952380953e-05,
"loss": 0.298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2886936664581299,
"step": 265,
"valid_targets_mean": 5059.9,
"valid_targets_min": 2016
},
{
"epoch": 0.6,
"grad_norm": 0.8577322725171138,
"learning_rate": 3.415873015873016e-05,
"loss": 0.2903,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2848944664001465,
"step": 270,
"valid_targets_mean": 4490.1,
"valid_targets_min": 369
},
{
"epoch": 0.6111111111111112,
"grad_norm": 0.7839943851011535,
"learning_rate": 3.47936507936508e-05,
"loss": 0.2766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2646746039390564,
"step": 275,
"valid_targets_mean": 4372.7,
"valid_targets_min": 347
},
{
"epoch": 0.6222222222222222,
"grad_norm": 0.6552099358054163,
"learning_rate": 3.542857142857143e-05,
"loss": 0.28,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.279890239238739,
"step": 280,
"valid_targets_mean": 3361.1,
"valid_targets_min": 173
},
{
"epoch": 0.6333333333333333,
"grad_norm": 0.6470809605687007,
"learning_rate": 3.606349206349207e-05,
"loss": 0.2958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3344566822052002,
"step": 285,
"valid_targets_mean": 3997.0,
"valid_targets_min": 263
},
{
"epoch": 0.6444444444444445,
"grad_norm": 0.6042965183251453,
"learning_rate": 3.66984126984127e-05,
"loss": 0.3008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29824304580688477,
"step": 290,
"valid_targets_mean": 4553.6,
"valid_targets_min": 285
},
{
"epoch": 0.6555555555555556,
"grad_norm": 0.9572004588809255,
"learning_rate": 3.733333333333334e-05,
"loss": 0.2685,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2197730839252472,
"step": 295,
"valid_targets_mean": 4863.1,
"valid_targets_min": 719
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.613931117239092,
"learning_rate": 3.796825396825397e-05,
"loss": 0.2869,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2734295129776001,
"step": 300,
"valid_targets_mean": 3048.4,
"valid_targets_min": 630
},
{
"epoch": 0.6777777777777778,
"grad_norm": 0.7225386302879916,
"learning_rate": 3.860317460317461e-05,
"loss": 0.2635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24375391006469727,
"step": 305,
"valid_targets_mean": 4168.9,
"valid_targets_min": 949
},
{
"epoch": 0.6888888888888889,
"grad_norm": 0.638212930897173,
"learning_rate": 3.923809523809524e-05,
"loss": 0.2679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2810541093349457,
"step": 310,
"valid_targets_mean": 3710.6,
"valid_targets_min": 275
},
{
"epoch": 0.7,
"grad_norm": 0.5816992650976073,
"learning_rate": 3.9873015873015876e-05,
"loss": 0.2778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26782992482185364,
"step": 315,
"valid_targets_mean": 4587.9,
"valid_targets_min": 923
},
{
"epoch": 0.7111111111111111,
"grad_norm": 0.5013466486109508,
"learning_rate": 3.999980352246968e-05,
"loss": 0.2757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24904786050319672,
"step": 320,
"valid_targets_mean": 4606.5,
"valid_targets_min": 402
},
{
"epoch": 0.7222222222222222,
"grad_norm": 0.713347061955866,
"learning_rate": 3.9999005339118864e-05,
"loss": 0.2866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.30004629492759705,
"step": 325,
"valid_targets_mean": 3887.9,
"valid_targets_min": 605
},
{
"epoch": 0.7333333333333333,
"grad_norm": 0.47963747379037935,
"learning_rate": 3.9997593194586953e-05,
"loss": 0.2852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2432718276977539,
"step": 330,
"valid_targets_mean": 4275.9,
"valid_targets_min": 535
},
{
"epoch": 0.7444444444444445,
"grad_norm": 0.6005660039240017,
"learning_rate": 3.99955671322262e-05,
"loss": 0.2799,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2737639248371124,
"step": 335,
"valid_targets_mean": 4195.4,
"valid_targets_min": 300
},
{
"epoch": 0.7555555555555555,
"grad_norm": 0.5205837688812412,
"learning_rate": 3.999292721423588e-05,
"loss": 0.2922,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3177686035633087,
"step": 340,
"valid_targets_mean": 5234.1,
"valid_targets_min": 460
},
{
"epoch": 0.7666666666666667,
"grad_norm": 0.5202820975533874,
"learning_rate": 3.998967352166037e-05,
"loss": 0.2837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27241072058677673,
"step": 345,
"valid_targets_mean": 4445.9,
"valid_targets_min": 656
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.5301976190091423,
"learning_rate": 3.998580615438671e-05,
"loss": 0.2983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27598923444747925,
"step": 350,
"valid_targets_mean": 5029.9,
"valid_targets_min": 859
},
{
"epoch": 0.7888888888888889,
"grad_norm": 0.5033260846088292,
"learning_rate": 3.998132523114146e-05,
"loss": 0.2885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25926336646080017,
"step": 355,
"valid_targets_mean": 4720.8,
"valid_targets_min": 299
},
{
"epoch": 0.8,
"grad_norm": 0.5062497800182023,
"learning_rate": 3.9976230889487107e-05,
"loss": 0.2716,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25432416796684265,
"step": 360,
"valid_targets_mean": 4968.1,
"valid_targets_min": 598
},
{
"epoch": 0.8111111111111111,
"grad_norm": 0.6022554597774316,
"learning_rate": 3.997052328581783e-05,
"loss": 0.2772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27107948064804077,
"step": 365,
"valid_targets_mean": 4174.4,
"valid_targets_min": 322
},
{
"epoch": 0.8222222222222222,
"grad_norm": 0.523430681701777,
"learning_rate": 3.99642025953547e-05,
"loss": 0.277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26425060629844666,
"step": 370,
"valid_targets_mean": 4664.8,
"valid_targets_min": 1464
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.5063947141064169,
"learning_rate": 3.9957269012140306e-05,
"loss": 0.2786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2735532522201538,
"step": 375,
"valid_targets_mean": 4143.1,
"valid_targets_min": 428
},
{
"epoch": 0.8444444444444444,
"grad_norm": 0.5680326007926829,
"learning_rate": 3.9949722749032755e-05,
"loss": 0.2705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24175962805747986,
"step": 380,
"valid_targets_mean": 3959.0,
"valid_targets_min": 530
},
{
"epoch": 0.8555555555555555,
"grad_norm": 0.5324488584110421,
"learning_rate": 3.994156403769922e-05,
"loss": 0.2908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25714313983917236,
"step": 385,
"valid_targets_mean": 4401.1,
"valid_targets_min": 907
},
{
"epoch": 0.8666666666666667,
"grad_norm": 0.5577161885914526,
"learning_rate": 3.993279312860876e-05,
"loss": 0.2549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27009209990501404,
"step": 390,
"valid_targets_mean": 4610.8,
"valid_targets_min": 436
},
{
"epoch": 0.8777777777777778,
"grad_norm": 0.4942352058414063,
"learning_rate": 3.9923410291024636e-05,
"loss": 0.2673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.271714985370636,
"step": 395,
"valid_targets_mean": 4242.4,
"valid_targets_min": 241
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.49807037133844223,
"learning_rate": 3.991341581299609e-05,
"loss": 0.2625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2850569486618042,
"step": 400,
"valid_targets_mean": 5568.6,
"valid_targets_min": 1111
},
{
"epoch": 0.9,
"grad_norm": 0.516015376982657,
"learning_rate": 3.990281000134946e-05,
"loss": 0.2851,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2795839309692383,
"step": 405,
"valid_targets_mean": 4242.3,
"valid_targets_min": 317
},
{
"epoch": 0.9111111111111111,
"grad_norm": 0.4858123801723977,
"learning_rate": 3.989159318167875e-05,
"loss": 0.281,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2678981423377991,
"step": 410,
"valid_targets_mean": 5212.5,
"valid_targets_min": 419
},
{
"epoch": 0.9222222222222223,
"grad_norm": 0.5792943773685142,
"learning_rate": 3.9879765698335705e-05,
"loss": 0.258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24788373708724976,
"step": 415,
"valid_targets_mean": 4014.3,
"valid_targets_min": 1329
},
{
"epoch": 0.9333333333333333,
"grad_norm": 0.5323729594389564,
"learning_rate": 3.986732791441915e-05,
"loss": 0.2765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29175180196762085,
"step": 420,
"valid_targets_mean": 4264.8,
"valid_targets_min": 467
},
{
"epoch": 0.9444444444444444,
"grad_norm": 0.5230426930677612,
"learning_rate": 3.985428021176391e-05,
"loss": 0.2613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22425571084022522,
"step": 425,
"valid_targets_mean": 4607.9,
"valid_targets_min": 214
},
{
"epoch": 0.9555555555555556,
"grad_norm": 0.5587632060354308,
"learning_rate": 3.984062299092904e-05,
"loss": 0.2607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2455931007862091,
"step": 430,
"valid_targets_mean": 3599.4,
"valid_targets_min": 562
},
{
"epoch": 0.9666666666666667,
"grad_norm": 0.5416728906416407,
"learning_rate": 3.982635667118557e-05,
"loss": 0.2771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2705487310886383,
"step": 435,
"valid_targets_mean": 4116.0,
"valid_targets_min": 795
},
{
"epoch": 0.9777777777777777,
"grad_norm": 0.5360254523319351,
"learning_rate": 3.981148169050361e-05,
"loss": 0.2749,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24848337471485138,
"step": 440,
"valid_targets_mean": 3970.2,
"valid_targets_min": 499
},
{
"epoch": 0.9888888888888889,
"grad_norm": 0.5253145463662034,
"learning_rate": 3.97959985055389e-05,
"loss": 0.3032,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3328373432159424,
"step": 445,
"valid_targets_mean": 5336.6,
"valid_targets_min": 1847
},
{
"epoch": 1.0,
"grad_norm": 0.48196208890145337,
"learning_rate": 3.97799075916188e-05,
"loss": 0.2528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27850598096847534,
"step": 450,
"valid_targets_mean": 5520.2,
"valid_targets_min": 1146
},
{
"epoch": 1.011111111111111,
"grad_norm": 0.6252822658746067,
"learning_rate": 3.976320944272773e-05,
"loss": 0.2498,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2661787271499634,
"step": 455,
"valid_targets_mean": 4193.9,
"valid_targets_min": 381
},
{
"epoch": 1.0222222222222221,
"grad_norm": 0.4701033558777485,
"learning_rate": 3.9745904571491916e-05,
"loss": 0.2431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.252180814743042,
"step": 460,
"valid_targets_mean": 5736.6,
"valid_targets_min": 1675
},
{
"epoch": 1.0333333333333334,
"grad_norm": 0.8485213019287988,
"learning_rate": 3.972799350916375e-05,
"loss": 0.2631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27109605073928833,
"step": 465,
"valid_targets_mean": 4100.7,
"valid_targets_min": 841
},
{
"epoch": 1.0444444444444445,
"grad_norm": 0.5743499080820584,
"learning_rate": 3.970947680560543e-05,
"loss": 0.2568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26906871795654297,
"step": 470,
"valid_targets_mean": 3466.4,
"valid_targets_min": 357
},
{
"epoch": 1.0555555555555556,
"grad_norm": 0.5031824328141823,
"learning_rate": 3.969035502927208e-05,
"loss": 0.2522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2581416368484497,
"step": 475,
"valid_targets_mean": 4564.5,
"valid_targets_min": 740
},
{
"epoch": 1.0666666666666667,
"grad_norm": 0.5700693169245215,
"learning_rate": 3.967062876719433e-05,
"loss": 0.2625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2493741810321808,
"step": 480,
"valid_targets_mean": 4157.4,
"valid_targets_min": 322
},
{
"epoch": 1.0777777777777777,
"grad_norm": 0.7056940332930607,
"learning_rate": 3.965029862496023e-05,
"loss": 0.275,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24406126141548157,
"step": 485,
"valid_targets_mean": 3929.6,
"valid_targets_min": 513
},
{
"epoch": 1.0888888888888888,
"grad_norm": 0.6063202867573172,
"learning_rate": 3.962936522669674e-05,
"loss": 0.274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3029976487159729,
"step": 490,
"valid_targets_mean": 4941.8,
"valid_targets_min": 173
},
{
"epoch": 1.1,
"grad_norm": 1.6432217580814634,
"learning_rate": 3.960782921505052e-05,
"loss": 0.2376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26101475954055786,
"step": 495,
"valid_targets_mean": 4462.2,
"valid_targets_min": 1231
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.45742604503694995,
"learning_rate": 3.9585691251168205e-05,
"loss": 0.2534,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24371081590652466,
"step": 500,
"valid_targets_mean": 5317.8,
"valid_targets_min": 662
},
{
"epoch": 1.1222222222222222,
"grad_norm": 0.583691622746648,
"learning_rate": 3.9562952014676116e-05,
"loss": 0.246,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2584843039512634,
"step": 505,
"valid_targets_mean": 3938.7,
"valid_targets_min": 242
},
{
"epoch": 1.1333333333333333,
"grad_norm": 0.5669510869721377,
"learning_rate": 3.95396122036594e-05,
"loss": 0.25,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24399635195732117,
"step": 510,
"valid_targets_mean": 3737.6,
"valid_targets_min": 769
},
{
"epoch": 1.1444444444444444,
"grad_norm": 0.5041490688945235,
"learning_rate": 3.951567253464058e-05,
"loss": 0.271,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2624698877334595,
"step": 515,
"valid_targets_mean": 4929.1,
"valid_targets_min": 229
},
{
"epoch": 1.1555555555555554,
"grad_norm": 0.6300993096751855,
"learning_rate": 3.949113374255759e-05,
"loss": 0.2305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.253865510225296,
"step": 520,
"valid_targets_mean": 3452.0,
"valid_targets_min": 350
},
{
"epoch": 1.1666666666666667,
"grad_norm": 0.6297449586516455,
"learning_rate": 3.946599658074117e-05,
"loss": 0.2423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2540290951728821,
"step": 525,
"valid_targets_mean": 4043.3,
"valid_targets_min": 377
},
{
"epoch": 1.1777777777777778,
"grad_norm": 0.6443213997898508,
"learning_rate": 3.94402618208918e-05,
"loss": 0.2753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.33983704447746277,
"step": 530,
"valid_targets_mean": 4571.4,
"valid_targets_min": 332
},
{
"epoch": 1.1888888888888889,
"grad_norm": 0.4851613432195201,
"learning_rate": 3.9413930253055925e-05,
"loss": 0.2515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2413058876991272,
"step": 535,
"valid_targets_mean": 5014.4,
"valid_targets_min": 1004
},
{
"epoch": 1.2,
"grad_norm": 0.5924543065212132,
"learning_rate": 3.938700268560179e-05,
"loss": 0.2423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26164644956588745,
"step": 540,
"valid_targets_mean": 5537.7,
"valid_targets_min": 682
},
{
"epoch": 1.211111111111111,
"grad_norm": 0.5348562981348433,
"learning_rate": 3.935947994519455e-05,
"loss": 0.2473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22140653431415558,
"step": 545,
"valid_targets_mean": 3933.1,
"valid_targets_min": 326
},
{
"epoch": 1.2222222222222223,
"grad_norm": 1.1507409216365962,
"learning_rate": 3.933136287677095e-05,
"loss": 0.2797,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2999425530433655,
"step": 550,
"valid_targets_mean": 4136.8,
"valid_targets_min": 393
},
{
"epoch": 1.2333333333333334,
"grad_norm": 0.6036157268361829,
"learning_rate": 3.9302652343513325e-05,
"loss": 0.251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24191945791244507,
"step": 555,
"valid_targets_mean": 3355.1,
"valid_targets_min": 354
},
{
"epoch": 1.2444444444444445,
"grad_norm": 0.5594737690815887,
"learning_rate": 3.927334922682319e-05,
"loss": 0.2386,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2738354802131653,
"step": 560,
"valid_targets_mean": 4464.2,
"valid_targets_min": 341
},
{
"epoch": 1.2555555555555555,
"grad_norm": 0.60403245110363,
"learning_rate": 3.924345442629405e-05,
"loss": 0.2414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22148557007312775,
"step": 565,
"valid_targets_mean": 3713.0,
"valid_targets_min": 618
},
{
"epoch": 1.2666666666666666,
"grad_norm": 0.5326475987050822,
"learning_rate": 3.9212968859683924e-05,
"loss": 0.239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2524668276309967,
"step": 570,
"valid_targets_mean": 4112.8,
"valid_targets_min": 998
},
{
"epoch": 1.2777777777777777,
"grad_norm": 0.5255254450372632,
"learning_rate": 3.918189346288708e-05,
"loss": 0.249,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25053149461746216,
"step": 575,
"valid_targets_mean": 4201.0,
"valid_targets_min": 359
},
{
"epoch": 1.2888888888888888,
"grad_norm": 0.5460490711902419,
"learning_rate": 3.9150229189905325e-05,
"loss": 0.2513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2665417790412903,
"step": 580,
"valid_targets_mean": 4447.8,
"valid_targets_min": 384
},
{
"epoch": 1.3,
"grad_norm": 0.8958369211208952,
"learning_rate": 3.911797701281872e-05,
"loss": 0.2594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27293702960014343,
"step": 585,
"valid_targets_mean": 3628.9,
"valid_targets_min": 755
},
{
"epoch": 1.3111111111111111,
"grad_norm": 0.5988510933683479,
"learning_rate": 3.9085137921755765e-05,
"loss": 0.2598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2321304976940155,
"step": 590,
"valid_targets_mean": 3787.6,
"valid_targets_min": 500
},
{
"epoch": 1.3222222222222222,
"grad_norm": 0.5432482040922059,
"learning_rate": 3.9051712924862926e-05,
"loss": 0.2528,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26185327768325806,
"step": 595,
"valid_targets_mean": 5390.7,
"valid_targets_min": 670
},
{
"epoch": 1.3333333333333333,
"grad_norm": 0.5377831944060099,
"learning_rate": 3.901770304827379e-05,
"loss": 0.2529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22949066758155823,
"step": 600,
"valid_targets_mean": 4550.2,
"valid_targets_min": 214
},
{
"epoch": 1.3444444444444446,
"grad_norm": 0.5612439217393881,
"learning_rate": 3.898310933607746e-05,
"loss": 0.2753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2890508472919464,
"step": 605,
"valid_targets_mean": 4195.9,
"valid_targets_min": 826
},
{
"epoch": 1.3555555555555556,
"grad_norm": 0.5303766289315424,
"learning_rate": 3.8947932850286585e-05,
"loss": 0.2389,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.264137327671051,
"step": 610,
"valid_targets_mean": 4352.6,
"valid_targets_min": 335
},
{
"epoch": 1.3666666666666667,
"grad_norm": 0.5137510782550517,
"learning_rate": 3.891217467080472e-05,
"loss": 0.2505,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23932068049907684,
"step": 615,
"valid_targets_mean": 4644.5,
"valid_targets_min": 1028
},
{
"epoch": 1.3777777777777778,
"grad_norm": 0.47391135683532554,
"learning_rate": 3.887583589539315e-05,
"loss": 0.2465,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.255960613489151,
"step": 620,
"valid_targets_mean": 4816.4,
"valid_targets_min": 579
},
{
"epoch": 1.3888888888888888,
"grad_norm": 0.5572300753645717,
"learning_rate": 3.883891763963723e-05,
"loss": 0.2529,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2373078167438507,
"step": 625,
"valid_targets_mean": 3751.8,
"valid_targets_min": 668
},
{
"epoch": 1.4,
"grad_norm": 0.49937213169450123,
"learning_rate": 3.880142103691213e-05,
"loss": 0.2341,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23268617689609528,
"step": 630,
"valid_targets_mean": 4873.1,
"valid_targets_min": 549
},
{
"epoch": 1.411111111111111,
"grad_norm": 0.5406692441975431,
"learning_rate": 3.876334723834802e-05,
"loss": 0.2561,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2593476176261902,
"step": 635,
"valid_targets_mean": 4517.7,
"valid_targets_min": 647
},
{
"epoch": 1.4222222222222223,
"grad_norm": 0.553445249581852,
"learning_rate": 3.872469741279475e-05,
"loss": 0.2496,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2700212597846985,
"step": 640,
"valid_targets_mean": 3890.1,
"valid_targets_min": 490
},
{
"epoch": 1.4333333333333333,
"grad_norm": 0.5301037730927833,
"learning_rate": 3.868547274678595e-05,
"loss": 0.2535,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2927553057670593,
"step": 645,
"valid_targets_mean": 4392.2,
"valid_targets_min": 423
},
{
"epoch": 1.4444444444444444,
"grad_norm": 0.44259558229551654,
"learning_rate": 3.864567444450263e-05,
"loss": 0.2524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24536505341529846,
"step": 650,
"valid_targets_mean": 6347.3,
"valid_targets_min": 1877
},
{
"epoch": 1.4555555555555555,
"grad_norm": 0.47771864860111035,
"learning_rate": 3.8605303727736186e-05,
"loss": 0.2434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22662869095802307,
"step": 655,
"valid_targets_mean": 4389.6,
"valid_targets_min": 352
},
{
"epoch": 1.4666666666666668,
"grad_norm": 0.6247118916552183,
"learning_rate": 3.856436183585089e-05,
"loss": 0.2638,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.283000111579895,
"step": 660,
"valid_targets_mean": 4078.9,
"valid_targets_min": 725
},
{
"epoch": 1.4777777777777779,
"grad_norm": 1.5070723708006228,
"learning_rate": 3.8522850025745885e-05,
"loss": 0.2584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25614166259765625,
"step": 665,
"valid_targets_mean": 5278.3,
"valid_targets_min": 495
},
{
"epoch": 1.488888888888889,
"grad_norm": 0.628570473741109,
"learning_rate": 3.8480769571816535e-05,
"loss": 0.2516,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29474788904190063,
"step": 670,
"valid_targets_mean": 3874.1,
"valid_targets_min": 545
},
{
"epoch": 1.5,
"grad_norm": 0.8699623629541559,
"learning_rate": 3.843812176591535e-05,
"loss": 0.2382,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22753040492534637,
"step": 675,
"valid_targets_mean": 3918.7,
"valid_targets_min": 309
},
{
"epoch": 1.511111111111111,
"grad_norm": 0.5112871185355921,
"learning_rate": 3.8394907917312314e-05,
"loss": 0.2391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22634285688400269,
"step": 680,
"valid_targets_mean": 4564.2,
"valid_targets_min": 406
},
{
"epoch": 1.5222222222222221,
"grad_norm": 0.47954206990442566,
"learning_rate": 3.835112935265468e-05,
"loss": 0.2412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2335192710161209,
"step": 685,
"valid_targets_mean": 4790.6,
"valid_targets_min": 337
},
{
"epoch": 1.5333333333333332,
"grad_norm": 0.48528209028878294,
"learning_rate": 3.830678741592625e-05,
"loss": 0.2358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23643608391284943,
"step": 690,
"valid_targets_mean": 4443.8,
"valid_targets_min": 548
},
{
"epoch": 1.5444444444444443,
"grad_norm": 0.6276984597487912,
"learning_rate": 3.826188346840611e-05,
"loss": 0.262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2579842805862427,
"step": 695,
"valid_targets_mean": 3282.2,
"valid_targets_min": 352
},
{
"epoch": 1.5555555555555556,
"grad_norm": 0.5476067080880181,
"learning_rate": 3.8216418888626864e-05,
"loss": 0.2439,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24156105518341064,
"step": 700,
"valid_targets_mean": 4612.9,
"valid_targets_min": 555
},
{
"epoch": 1.5666666666666667,
"grad_norm": 0.5413141061562579,
"learning_rate": 3.817039507233227e-05,
"loss": 0.2569,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2622939646244049,
"step": 705,
"valid_targets_mean": 4257.5,
"valid_targets_min": 286
},
{
"epoch": 1.5777777777777777,
"grad_norm": 0.5191592586122504,
"learning_rate": 3.812381343243444e-05,
"loss": 0.2376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2551836371421814,
"step": 710,
"valid_targets_mean": 4099.2,
"valid_targets_min": 605
},
{
"epoch": 1.588888888888889,
"grad_norm": 0.6436243723923792,
"learning_rate": 3.807667539897041e-05,
"loss": 0.238,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.227070152759552,
"step": 715,
"valid_targets_mean": 3246.0,
"valid_targets_min": 348
},
{
"epoch": 1.6,
"grad_norm": 0.6239016531037459,
"learning_rate": 3.8028982419058304e-05,
"loss": 0.2585,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26668721437454224,
"step": 720,
"valid_targets_mean": 3396.3,
"valid_targets_min": 248
},
{
"epoch": 1.6111111111111112,
"grad_norm": 0.9192051442781469,
"learning_rate": 3.798073595685283e-05,
"loss": 0.2473,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25195956230163574,
"step": 725,
"valid_targets_mean": 3665.6,
"valid_targets_min": 272
},
{
"epoch": 1.6222222222222222,
"grad_norm": 0.5623905152284274,
"learning_rate": 3.793193749350042e-05,
"loss": 0.2523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23393118381500244,
"step": 730,
"valid_targets_mean": 5211.4,
"valid_targets_min": 661
},
{
"epoch": 1.6333333333333333,
"grad_norm": 0.5429518143801225,
"learning_rate": 3.788258852709367e-05,
"loss": 0.2593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22861596941947937,
"step": 735,
"valid_targets_mean": 5047.1,
"valid_targets_min": 589
},
{
"epoch": 1.6444444444444444,
"grad_norm": 0.47826555516018626,
"learning_rate": 3.7832690572625417e-05,
"loss": 0.2193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21526125073432922,
"step": 740,
"valid_targets_mean": 4602.2,
"valid_targets_min": 839
},
{
"epoch": 1.6555555555555554,
"grad_norm": 0.5863707657883438,
"learning_rate": 3.77822451619422e-05,
"loss": 0.2603,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2455175220966339,
"step": 745,
"valid_targets_mean": 4632.7,
"valid_targets_min": 413
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.5169118790673137,
"learning_rate": 3.773125384369723e-05,
"loss": 0.2438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27260589599609375,
"step": 750,
"valid_targets_mean": 4461.6,
"valid_targets_min": 331
},
{
"epoch": 1.6777777777777778,
"grad_norm": 0.5608117782474311,
"learning_rate": 3.7679718183302856e-05,
"loss": 0.2519,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25778332352638245,
"step": 755,
"valid_targets_mean": 4585.0,
"valid_targets_min": 616
},
{
"epoch": 1.6888888888888889,
"grad_norm": 0.5536343736249013,
"learning_rate": 3.762763976288252e-05,
"loss": 0.261,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26178520917892456,
"step": 760,
"valid_targets_mean": 4168.1,
"valid_targets_min": 590
},
{
"epoch": 1.7,
"grad_norm": 0.4967067858273974,
"learning_rate": 3.757502018122215e-05,
"loss": 0.2604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23201864957809448,
"step": 765,
"valid_targets_mean": 4762.7,
"valid_targets_min": 656
},
{
"epoch": 1.7111111111111112,
"grad_norm": 0.6431920727054683,
"learning_rate": 3.7521861053721104e-05,
"loss": 0.2734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3084906339645386,
"step": 770,
"valid_targets_mean": 3282.1,
"valid_targets_min": 355
},
{
"epoch": 1.7222222222222223,
"grad_norm": 0.49539913031077976,
"learning_rate": 3.74681640123426e-05,
"loss": 0.2436,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2623264789581299,
"step": 775,
"valid_targets_mean": 4743.1,
"valid_targets_min": 795
},
{
"epoch": 1.7333333333333334,
"grad_norm": 0.5255901704167366,
"learning_rate": 3.741393070556355e-05,
"loss": 0.2469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25359559059143066,
"step": 780,
"valid_targets_mean": 4230.9,
"valid_targets_min": 932
},
{
"epoch": 1.7444444444444445,
"grad_norm": 0.5202593209519554,
"learning_rate": 3.7359162798324015e-05,
"loss": 0.2451,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3075648546218872,
"step": 785,
"valid_targets_mean": 4931.7,
"valid_targets_min": 426
},
{
"epoch": 1.7555555555555555,
"grad_norm": 0.6052760837991578,
"learning_rate": 3.7303861971976074e-05,
"loss": 0.2645,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21487626433372498,
"step": 790,
"valid_targets_mean": 3999.6,
"valid_targets_min": 414
},
{
"epoch": 1.7666666666666666,
"grad_norm": 0.5902290421718297,
"learning_rate": 3.724802992423218e-05,
"loss": 0.2541,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24608749151229858,
"step": 795,
"valid_targets_mean": 3589.3,
"valid_targets_min": 301
},
{
"epoch": 1.7777777777777777,
"grad_norm": 0.6368794220763241,
"learning_rate": 3.719166836911309e-05,
"loss": 0.2563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25677546858787537,
"step": 800,
"valid_targets_mean": 4686.3,
"valid_targets_min": 703
},
{
"epoch": 1.7888888888888888,
"grad_norm": 0.5323661851044132,
"learning_rate": 3.713477903689518e-05,
"loss": 0.2532,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2390604317188263,
"step": 805,
"valid_targets_mean": 4084.9,
"valid_targets_min": 339
},
{
"epoch": 1.8,
"grad_norm": 0.5186885779998889,
"learning_rate": 3.707736367405741e-05,
"loss": 0.2542,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22952812910079956,
"step": 810,
"valid_targets_mean": 3964.5,
"valid_targets_min": 238
},
{
"epoch": 1.8111111111111111,
"grad_norm": 0.6011432502817368,
"learning_rate": 3.701942404322764e-05,
"loss": 0.2443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28318285942077637,
"step": 815,
"valid_targets_mean": 4504.8,
"valid_targets_min": 401
},
{
"epoch": 1.8222222222222222,
"grad_norm": 0.5057107246393668,
"learning_rate": 3.696096192312852e-05,
"loss": 0.2468,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2750961482524872,
"step": 820,
"valid_targets_mean": 4184.1,
"valid_targets_min": 299
},
{
"epoch": 1.8333333333333335,
"grad_norm": 0.5674106572096047,
"learning_rate": 3.690197910852294e-05,
"loss": 0.2688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.284572958946228,
"step": 825,
"valid_targets_mean": 4159.1,
"valid_targets_min": 883
},
{
"epoch": 1.8444444444444446,
"grad_norm": 0.524428929125523,
"learning_rate": 3.684247741015888e-05,
"loss": 0.235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2377292364835739,
"step": 830,
"valid_targets_mean": 4545.5,
"valid_targets_min": 783
},
{
"epoch": 1.8555555555555556,
"grad_norm": 0.5197318284715093,
"learning_rate": 3.678245865471383e-05,
"loss": 0.2482,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25039640069007874,
"step": 835,
"valid_targets_mean": 3829.6,
"valid_targets_min": 436
},
{
"epoch": 1.8666666666666667,
"grad_norm": 0.741122146720882,
"learning_rate": 3.672192468473872e-05,
"loss": 0.2437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25348931550979614,
"step": 840,
"valid_targets_mean": 3802.5,
"valid_targets_min": 494
},
{
"epoch": 1.8777777777777778,
"grad_norm": 0.5653997003973549,
"learning_rate": 3.666087735860138e-05,
"loss": 0.2422,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23701179027557373,
"step": 845,
"valid_targets_mean": 4565.3,
"valid_targets_min": 441
},
{
"epoch": 1.8888888888888888,
"grad_norm": 0.47961375383032556,
"learning_rate": 3.6599318550429415e-05,
"loss": 0.2504,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28315088152885437,
"step": 850,
"valid_targets_mean": 4855.1,
"valid_targets_min": 356
},
{
"epoch": 1.9,
"grad_norm": 0.4960630224668749,
"learning_rate": 3.653725015005275e-05,
"loss": 0.2358,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24288593232631683,
"step": 855,
"valid_targets_mean": 4795.5,
"valid_targets_min": 193
},
{
"epoch": 1.911111111111111,
"grad_norm": 0.5198260135949281,
"learning_rate": 3.6474674062945573e-05,
"loss": 0.2466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2727063298225403,
"step": 860,
"valid_targets_mean": 5094.9,
"valid_targets_min": 339
},
{
"epoch": 1.9222222222222223,
"grad_norm": 0.963664843983337,
"learning_rate": 3.6411592210167834e-05,
"loss": 0.242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2163504958152771,
"step": 865,
"valid_targets_mean": 4430.9,
"valid_targets_min": 434
},
{
"epoch": 1.9333333333333333,
"grad_norm": 0.5387347139824947,
"learning_rate": 3.6348006528306295e-05,
"loss": 0.2441,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24510449171066284,
"step": 870,
"valid_targets_mean": 4382.1,
"valid_targets_min": 503
},
{
"epoch": 1.9444444444444444,
"grad_norm": 0.499809810495946,
"learning_rate": 3.628391896941505e-05,
"loss": 0.2349,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22553254663944244,
"step": 875,
"valid_targets_mean": 4035.4,
"valid_targets_min": 263
},
{
"epoch": 1.9555555555555557,
"grad_norm": 0.5218149844142983,
"learning_rate": 3.621933150095561e-05,
"loss": 0.2662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25849276781082153,
"step": 880,
"valid_targets_mean": 3824.7,
"valid_targets_min": 680
},
{
"epoch": 1.9666666666666668,
"grad_norm": 0.5123574858401871,
"learning_rate": 3.615424610573651e-05,
"loss": 0.2681,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28296032547950745,
"step": 885,
"valid_targets_mean": 4234.2,
"valid_targets_min": 694
},
{
"epoch": 1.9777777777777779,
"grad_norm": 0.5240352817270689,
"learning_rate": 3.608866478185245e-05,
"loss": 0.2466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2798116207122803,
"step": 890,
"valid_targets_mean": 4745.1,
"valid_targets_min": 438
},
{
"epoch": 1.988888888888889,
"grad_norm": 0.47021296088948716,
"learning_rate": 3.602258954262287e-05,
"loss": 0.235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24416977167129517,
"step": 895,
"valid_targets_mean": 4577.2,
"valid_targets_min": 272
},
{
"epoch": 2.0,
"grad_norm": 0.5152852896564755,
"learning_rate": 3.595602241653028e-05,
"loss": 0.248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.227662593126297,
"step": 900,
"valid_targets_mean": 3932.9,
"valid_targets_min": 701
},
{
"epoch": 2.011111111111111,
"grad_norm": 0.8269495388300478,
"learning_rate": 3.588896544715787e-05,
"loss": 0.2363,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2364978790283203,
"step": 905,
"valid_targets_mean": 4779.8,
"valid_targets_min": 1630
},
{
"epoch": 2.022222222222222,
"grad_norm": 0.5822111147854571,
"learning_rate": 3.5821420693126834e-05,
"loss": 0.2403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2130419909954071,
"step": 910,
"valid_targets_mean": 3855.6,
"valid_targets_min": 311
},
{
"epoch": 2.033333333333333,
"grad_norm": 0.503530838011803,
"learning_rate": 3.575339022803313e-05,
"loss": 0.2336,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1974865049123764,
"step": 915,
"valid_targets_mean": 3615.4,
"valid_targets_min": 242
},
{
"epoch": 2.0444444444444443,
"grad_norm": 0.5881001614957597,
"learning_rate": 3.5684876140383875e-05,
"loss": 0.2199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2469121217727661,
"step": 920,
"valid_targets_mean": 4366.9,
"valid_targets_min": 450
},
{
"epoch": 2.0555555555555554,
"grad_norm": 0.5375060558594457,
"learning_rate": 3.561588053353319e-05,
"loss": 0.2174,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21924357116222382,
"step": 925,
"valid_targets_mean": 4148.6,
"valid_targets_min": 1350
},
{
"epoch": 2.066666666666667,
"grad_norm": 0.5123963351563102,
"learning_rate": 3.554640552561761e-05,
"loss": 0.22,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21245072782039642,
"step": 930,
"valid_targets_mean": 3831.3,
"valid_targets_min": 441
},
{
"epoch": 2.077777777777778,
"grad_norm": 0.5890369925664793,
"learning_rate": 3.5476453249491125e-05,
"loss": 0.2297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22979292273521423,
"step": 935,
"valid_targets_mean": 4769.4,
"valid_targets_min": 780
},
{
"epoch": 2.088888888888889,
"grad_norm": 0.5822543746194564,
"learning_rate": 3.5406025852659626e-05,
"loss": 0.2005,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22272920608520508,
"step": 940,
"valid_targets_mean": 3913.2,
"valid_targets_min": 665
},
{
"epoch": 2.1,
"grad_norm": 0.5062356595758193,
"learning_rate": 3.533512549721503e-05,
"loss": 0.2129,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19117265939712524,
"step": 945,
"valid_targets_mean": 4397.1,
"valid_targets_min": 379
},
{
"epoch": 2.111111111111111,
"grad_norm": 0.5053075095700746,
"learning_rate": 3.5263754359768896e-05,
"loss": 0.2176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2279876470565796,
"step": 950,
"valid_targets_mean": 4971.6,
"valid_targets_min": 1361
},
{
"epoch": 2.1222222222222222,
"grad_norm": 0.48516711141414565,
"learning_rate": 3.5191914631385565e-05,
"loss": 0.2184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2130102515220642,
"step": 955,
"valid_targets_mean": 4136.3,
"valid_targets_min": 967
},
{
"epoch": 2.1333333333333333,
"grad_norm": 0.5779110961074571,
"learning_rate": 3.511960851751496e-05,
"loss": 0.2139,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2052951157093048,
"step": 960,
"valid_targets_mean": 3846.1,
"valid_targets_min": 629
},
{
"epoch": 2.1444444444444444,
"grad_norm": 0.5423358183039504,
"learning_rate": 3.504683823792483e-05,
"loss": 0.231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20998695492744446,
"step": 965,
"valid_targets_mean": 3942.2,
"valid_targets_min": 283
},
{
"epoch": 2.1555555555555554,
"grad_norm": 0.4731164937857403,
"learning_rate": 3.49736060266326e-05,
"loss": 0.2194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20602105557918549,
"step": 970,
"valid_targets_mean": 4549.1,
"valid_targets_min": 825
},
{
"epoch": 2.1666666666666665,
"grad_norm": 0.5192629974349211,
"learning_rate": 3.489991413183686e-05,
"loss": 0.2077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1989196240901947,
"step": 975,
"valid_targets_mean": 4872.6,
"valid_targets_min": 556
},
{
"epoch": 2.1777777777777776,
"grad_norm": 0.5443326064321022,
"learning_rate": 3.482576481584824e-05,
"loss": 0.2232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2225218117237091,
"step": 980,
"valid_targets_mean": 3649.8,
"valid_targets_min": 546
},
{
"epoch": 2.188888888888889,
"grad_norm": 0.5020893394971628,
"learning_rate": 3.4751160355020034e-05,
"loss": 0.2094,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23120808601379395,
"step": 985,
"valid_targets_mean": 4243.1,
"valid_targets_min": 841
},
{
"epoch": 2.2,
"grad_norm": 0.48166947840338786,
"learning_rate": 3.467610303967829e-05,
"loss": 0.2195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18600469827651978,
"step": 990,
"valid_targets_mean": 5170.1,
"valid_targets_min": 1719
},
{
"epoch": 2.2111111111111112,
"grad_norm": 0.5583920558407086,
"learning_rate": 3.4600595174051496e-05,
"loss": 0.248,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27588558197021484,
"step": 995,
"valid_targets_mean": 4328.1,
"valid_targets_min": 874
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.5416793417738504,
"learning_rate": 3.452463907619986e-05,
"loss": 0.2202,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24462614953517914,
"step": 1000,
"valid_targets_mean": 4362.1,
"valid_targets_min": 722
},
{
"epoch": 2.2333333333333334,
"grad_norm": 0.5328060833599986,
"learning_rate": 3.444823707794414e-05,
"loss": 0.2321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2523013949394226,
"step": 1005,
"valid_targets_mean": 4839.0,
"valid_targets_min": 352
},
{
"epoch": 2.2444444444444445,
"grad_norm": 0.563391734301446,
"learning_rate": 3.437139152479403e-05,
"loss": 0.2199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19773298501968384,
"step": 1010,
"valid_targets_mean": 4385.2,
"valid_targets_min": 580
},
{
"epoch": 2.2555555555555555,
"grad_norm": 0.5074311551875781,
"learning_rate": 3.429410477587619e-05,
"loss": 0.2146,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2079458087682724,
"step": 1015,
"valid_targets_mean": 4402.8,
"valid_targets_min": 270
},
{
"epoch": 2.2666666666666666,
"grad_norm": 0.44755347145933255,
"learning_rate": 3.4216379203861785e-05,
"loss": 0.2155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20723801851272583,
"step": 1020,
"valid_targets_mean": 6327.4,
"valid_targets_min": 2089
},
{
"epoch": 2.2777777777777777,
"grad_norm": 0.5637924379901195,
"learning_rate": 3.41382171948937e-05,
"loss": 0.2394,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2707464098930359,
"step": 1025,
"valid_targets_mean": 4543.5,
"valid_targets_min": 721
},
{
"epoch": 2.2888888888888888,
"grad_norm": 0.5493222833081342,
"learning_rate": 3.405962114851324e-05,
"loss": 0.2265,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23270747065544128,
"step": 1030,
"valid_targets_mean": 3841.1,
"valid_targets_min": 733
},
{
"epoch": 2.3,
"grad_norm": 0.5377176986106669,
"learning_rate": 3.398059347758647e-05,
"loss": 0.2384,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2036696970462799,
"step": 1035,
"valid_targets_mean": 3737.5,
"valid_targets_min": 443
},
{
"epoch": 2.311111111111111,
"grad_norm": 0.48575675539344104,
"learning_rate": 3.3901136608230166e-05,
"loss": 0.2424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24168801307678223,
"step": 1040,
"valid_targets_mean": 5109.9,
"valid_targets_min": 623
},
{
"epoch": 2.3222222222222224,
"grad_norm": 0.5389208075547236,
"learning_rate": 3.38212529797373e-05,
"loss": 0.2259,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2333628237247467,
"step": 1045,
"valid_targets_mean": 4541.2,
"valid_targets_min": 507
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.6380380157616352,
"learning_rate": 3.374094504450218e-05,
"loss": 0.2182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21096259355545044,
"step": 1050,
"valid_targets_mean": 2654.1,
"valid_targets_min": 218
},
{
"epoch": 2.3444444444444446,
"grad_norm": 0.5618636585728415,
"learning_rate": 3.366021526794517e-05,
"loss": 0.2289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22567206621170044,
"step": 1055,
"valid_targets_mean": 3951.7,
"valid_targets_min": 310
},
{
"epoch": 2.3555555555555556,
"grad_norm": 0.590035420993425,
"learning_rate": 3.357906612843697e-05,
"loss": 0.2224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22020503878593445,
"step": 1060,
"valid_targets_mean": 3162.4,
"valid_targets_min": 248
},
{
"epoch": 2.3666666666666667,
"grad_norm": 0.5197847274605225,
"learning_rate": 3.349750011722256e-05,
"loss": 0.2298,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23694512248039246,
"step": 1065,
"valid_targets_mean": 4473.9,
"valid_targets_min": 539
},
{
"epoch": 2.3777777777777778,
"grad_norm": 0.5902730606680944,
"learning_rate": 3.3415519738344686e-05,
"loss": 0.2292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23186247050762177,
"step": 1070,
"valid_targets_mean": 4213.4,
"valid_targets_min": 535
},
{
"epoch": 2.388888888888889,
"grad_norm": 0.48708079743882093,
"learning_rate": 3.333312750856703e-05,
"loss": 0.2184,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22630329430103302,
"step": 1075,
"valid_targets_mean": 5137.1,
"valid_targets_min": 485
},
{
"epoch": 2.4,
"grad_norm": 0.6201131687835197,
"learning_rate": 3.3250325957296936e-05,
"loss": 0.2296,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2439213991165161,
"step": 1080,
"valid_targets_mean": 4311.1,
"valid_targets_min": 337
},
{
"epoch": 2.411111111111111,
"grad_norm": 0.5910991737720884,
"learning_rate": 3.3167117626507726e-05,
"loss": 0.2226,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22350382804870605,
"step": 1085,
"valid_targets_mean": 4331.2,
"valid_targets_min": 923
},
{
"epoch": 2.422222222222222,
"grad_norm": 0.5745602273524396,
"learning_rate": 3.308350507066069e-05,
"loss": 0.2285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2272610366344452,
"step": 1090,
"valid_targets_mean": 3677.5,
"valid_targets_min": 620
},
{
"epoch": 2.4333333333333336,
"grad_norm": 0.5154857981766388,
"learning_rate": 3.2999490856626674e-05,
"loss": 0.2117,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21586115658283234,
"step": 1095,
"valid_targets_mean": 4540.8,
"valid_targets_min": 449
},
{
"epoch": 2.4444444444444446,
"grad_norm": 0.49655172714458007,
"learning_rate": 3.291507756360725e-05,
"loss": 0.2399,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23002441227436066,
"step": 1100,
"valid_targets_mean": 4644.0,
"valid_targets_min": 328
},
{
"epoch": 2.4555555555555557,
"grad_norm": 0.6161023573158174,
"learning_rate": 3.283026778305554e-05,
"loss": 0.224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24754559993743896,
"step": 1105,
"valid_targets_mean": 4383.9,
"valid_targets_min": 263
},
{
"epoch": 2.466666666666667,
"grad_norm": 0.47036375778212935,
"learning_rate": 3.2745064118596696e-05,
"loss": 0.2317,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20647308230400085,
"step": 1110,
"valid_targets_mean": 5761.9,
"valid_targets_min": 689
},
{
"epoch": 2.477777777777778,
"grad_norm": 0.5406137267974768,
"learning_rate": 3.265946918594793e-05,
"loss": 0.232,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23646476864814758,
"step": 1115,
"valid_targets_mean": 3537.1,
"valid_targets_min": 795
},
{
"epoch": 2.488888888888889,
"grad_norm": 0.4637714307748355,
"learning_rate": 3.257348561283822e-05,
"loss": 0.2217,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22905629873275757,
"step": 1120,
"valid_targets_mean": 5062.0,
"valid_targets_min": 736
},
{
"epoch": 2.5,
"grad_norm": 0.4800390566792728,
"learning_rate": 3.248711603892765e-05,
"loss": 0.2144,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2034926414489746,
"step": 1125,
"valid_targets_mean": 4869.9,
"valid_targets_min": 1248
},
{
"epoch": 2.511111111111111,
"grad_norm": 3.5144120907404144,
"learning_rate": 3.240036311572635e-05,
"loss": 0.2476,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3505418002605438,
"step": 1130,
"valid_targets_mean": 3852.2,
"valid_targets_min": 763
},
{
"epoch": 2.522222222222222,
"grad_norm": 0.6588418511183887,
"learning_rate": 3.2313229506513167e-05,
"loss": 0.2188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24138295650482178,
"step": 1135,
"valid_targets_mean": 3041.4,
"valid_targets_min": 419
},
{
"epoch": 2.533333333333333,
"grad_norm": 0.4351804600110289,
"learning_rate": 3.22257178862538e-05,
"loss": 0.2431,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22207850217819214,
"step": 1140,
"valid_targets_mean": 5268.9,
"valid_targets_min": 1175
},
{
"epoch": 2.5444444444444443,
"grad_norm": 0.5924820635484583,
"learning_rate": 3.213783094151873e-05,
"loss": 0.2299,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24633681774139404,
"step": 1145,
"valid_targets_mean": 4402.8,
"valid_targets_min": 852
},
{
"epoch": 2.5555555555555554,
"grad_norm": 0.4637928251048832,
"learning_rate": 3.204957137040079e-05,
"loss": 0.2235,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22640839219093323,
"step": 1150,
"valid_targets_mean": 4871.2,
"valid_targets_min": 966
},
{
"epoch": 2.5666666666666664,
"grad_norm": 0.5791271707374432,
"learning_rate": 3.196094188243224e-05,
"loss": 0.2182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24356502294540405,
"step": 1155,
"valid_targets_mean": 4226.2,
"valid_targets_min": 795
},
{
"epoch": 2.5777777777777775,
"grad_norm": 0.544475419998541,
"learning_rate": 3.187194519850167e-05,
"loss": 0.2195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21199198067188263,
"step": 1160,
"valid_targets_mean": 4666.7,
"valid_targets_min": 555
},
{
"epoch": 2.588888888888889,
"grad_norm": 0.5117366718721723,
"learning_rate": 3.17825840507704e-05,
"loss": 0.2326,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2462947815656662,
"step": 1165,
"valid_targets_mean": 4306.7,
"valid_targets_min": 231
},
{
"epoch": 2.6,
"grad_norm": 0.5185736052483539,
"learning_rate": 3.169286118258867e-05,
"loss": 0.2191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21428170800209045,
"step": 1170,
"valid_targets_mean": 4292.4,
"valid_targets_min": 374
},
{
"epoch": 2.611111111111111,
"grad_norm": 0.5124442481522459,
"learning_rate": 3.1602779348411354e-05,
"loss": 0.2194,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21586015820503235,
"step": 1175,
"valid_targets_mean": 4098.7,
"valid_targets_min": 284
},
{
"epoch": 2.6222222222222222,
"grad_norm": 0.5013956254301979,
"learning_rate": 3.151234131371348e-05,
"loss": 0.2224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20073631405830383,
"step": 1180,
"valid_targets_mean": 3848.8,
"valid_targets_min": 473
},
{
"epoch": 2.6333333333333333,
"grad_norm": 0.5498700950427212,
"learning_rate": 3.142154985490523e-05,
"loss": 0.2231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22234992682933807,
"step": 1185,
"valid_targets_mean": 3493.1,
"valid_targets_min": 754
},
{
"epoch": 2.6444444444444444,
"grad_norm": 0.5491708841030193,
"learning_rate": 3.1330407759246805e-05,
"loss": 0.2079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22136567533016205,
"step": 1190,
"valid_targets_mean": 3996.4,
"valid_targets_min": 251
},
{
"epoch": 2.6555555555555554,
"grad_norm": 0.48268790906327425,
"learning_rate": 3.1238917824762794e-05,
"loss": 0.2062,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2028353214263916,
"step": 1195,
"valid_targets_mean": 4584.0,
"valid_targets_min": 650
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.49007399842315175,
"learning_rate": 3.1147082860156275e-05,
"loss": 0.2228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25679466128349304,
"step": 1200,
"valid_targets_mean": 5399.8,
"valid_targets_min": 982
},
{
"epoch": 2.677777777777778,
"grad_norm": 0.4633684064891278,
"learning_rate": 3.105490568472266e-05,
"loss": 0.2108,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2022610604763031,
"step": 1205,
"valid_targets_mean": 4171.5,
"valid_targets_min": 757
},
{
"epoch": 2.688888888888889,
"grad_norm": 0.4895671052558776,
"learning_rate": 3.0962389128263025e-05,
"loss": 0.2333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19812658429145813,
"step": 1210,
"valid_targets_mean": 5454.3,
"valid_targets_min": 420
},
{
"epoch": 2.7,
"grad_norm": 0.5453665235834874,
"learning_rate": 3.086953603099736e-05,
"loss": 0.2199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20646308362483978,
"step": 1215,
"valid_targets_mean": 4315.4,
"valid_targets_min": 465
},
{
"epoch": 2.7111111111111112,
"grad_norm": 0.5581759362267998,
"learning_rate": 3.077634924347728e-05,
"loss": 0.2182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22275254130363464,
"step": 1220,
"valid_targets_mean": 3761.5,
"valid_targets_min": 269
},
{
"epoch": 2.7222222222222223,
"grad_norm": 0.5153650133576909,
"learning_rate": 3.068283162649858e-05,
"loss": 0.2293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21216610074043274,
"step": 1225,
"valid_targets_mean": 4164.8,
"valid_targets_min": 296
},
{
"epoch": 2.7333333333333334,
"grad_norm": 0.592596188705314,
"learning_rate": 3.0588986051013355e-05,
"loss": 0.216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22181862592697144,
"step": 1230,
"valid_targets_mean": 4035.2,
"valid_targets_min": 283
},
{
"epoch": 2.7444444444444445,
"grad_norm": 0.5983613901546053,
"learning_rate": 3.049481539804192e-05,
"loss": 0.2231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22257715463638306,
"step": 1235,
"valid_targets_mean": 3023.5,
"valid_targets_min": 701
},
{
"epoch": 2.7555555555555555,
"grad_norm": 0.49305225026999655,
"learning_rate": 3.0400322558584308e-05,
"loss": 0.2201,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19669756293296814,
"step": 1240,
"valid_targets_mean": 4822.0,
"valid_targets_min": 1400
},
{
"epoch": 2.7666666666666666,
"grad_norm": 0.4752603478313126,
"learning_rate": 3.0305510433531568e-05,
"loss": 0.2444,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23400330543518066,
"step": 1245,
"valid_targets_mean": 4814.9,
"valid_targets_min": 300
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.5551934554676596,
"learning_rate": 3.0210381933576654e-05,
"loss": 0.2227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2083454132080078,
"step": 1250,
"valid_targets_mean": 3553.5,
"valid_targets_min": 348
},
{
"epoch": 2.7888888888888888,
"grad_norm": 0.7744675886957486,
"learning_rate": 3.0114939979125135e-05,
"loss": 0.2268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2401510775089264,
"step": 1255,
"valid_targets_mean": 3016.8,
"valid_targets_min": 539
},
{
"epoch": 2.8,
"grad_norm": 0.5540461372381361,
"learning_rate": 3.001918750020547e-05,
"loss": 0.2216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2751244008541107,
"step": 1260,
"valid_targets_mean": 3901.6,
"valid_targets_min": 539
},
{
"epoch": 2.811111111111111,
"grad_norm": 0.6851890356876696,
"learning_rate": 2.992312743637911e-05,
"loss": 0.2121,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.25260940194129944,
"step": 1265,
"valid_targets_mean": 4583.8,
"valid_targets_min": 668
},
{
"epoch": 2.822222222222222,
"grad_norm": 0.48363964794454084,
"learning_rate": 2.982676273665023e-05,
"loss": 0.2065,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2125380039215088,
"step": 1270,
"valid_targets_mean": 5045.1,
"valid_targets_min": 667
},
{
"epoch": 2.8333333333333335,
"grad_norm": 0.6033207649950939,
"learning_rate": 2.97300963593752e-05,
"loss": 0.2301,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22328922152519226,
"step": 1275,
"valid_targets_mean": 5039.6,
"valid_targets_min": 1597
},
{
"epoch": 2.8444444444444446,
"grad_norm": 0.5440870025040494,
"learning_rate": 2.9633131272171768e-05,
"loss": 0.2155,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21130454540252686,
"step": 1280,
"valid_targets_mean": 3933.2,
"valid_targets_min": 511
},
{
"epoch": 2.8555555555555556,
"grad_norm": 0.46685874495827057,
"learning_rate": 2.953587045182795e-05,
"loss": 0.2136,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2029666304588318,
"step": 1285,
"valid_targets_mean": 4932.2,
"valid_targets_min": 414
},
{
"epoch": 2.8666666666666667,
"grad_norm": 0.5083384300553782,
"learning_rate": 2.943831688421066e-05,
"loss": 0.2263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20045515894889832,
"step": 1290,
"valid_targets_mean": 3839.1,
"valid_targets_min": 2102
},
{
"epoch": 2.8777777777777778,
"grad_norm": 0.5496306210874803,
"learning_rate": 2.9340473564174003e-05,
"loss": 0.2254,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2500072121620178,
"step": 1295,
"valid_targets_mean": 4098.8,
"valid_targets_min": 417
},
{
"epoch": 2.888888888888889,
"grad_norm": 0.5194734275085567,
"learning_rate": 2.9242343495467396e-05,
"loss": 0.2347,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22234457731246948,
"step": 1300,
"valid_targets_mean": 3903.9,
"valid_targets_min": 411
},
{
"epoch": 2.9,
"grad_norm": 0.504766986056304,
"learning_rate": 2.9143929690643292e-05,
"loss": 0.2228,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23288145661354065,
"step": 1305,
"valid_targets_mean": 5635.0,
"valid_targets_min": 1210
},
{
"epoch": 2.911111111111111,
"grad_norm": 0.6498390965909955,
"learning_rate": 2.9045235170964754e-05,
"loss": 0.2322,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23700065910816193,
"step": 1310,
"valid_targets_mean": 4147.7,
"valid_targets_min": 374
},
{
"epoch": 2.9222222222222225,
"grad_norm": 0.47655618469018296,
"learning_rate": 2.8946262966312652e-05,
"loss": 0.2247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20102891325950623,
"step": 1315,
"valid_targets_mean": 3986.3,
"valid_targets_min": 301
},
{
"epoch": 2.9333333333333336,
"grad_norm": 0.444313610842639,
"learning_rate": 2.8847016115092686e-05,
"loss": 0.2268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20955491065979004,
"step": 1320,
"valid_targets_mean": 5599.8,
"valid_targets_min": 403
},
{
"epoch": 2.9444444444444446,
"grad_norm": 0.5345322272032264,
"learning_rate": 2.8747497664142075e-05,
"loss": 0.2085,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21730177104473114,
"step": 1325,
"valid_targets_mean": 3947.6,
"valid_targets_min": 646
},
{
"epoch": 2.9555555555555557,
"grad_norm": 0.48868872486689147,
"learning_rate": 2.8647710668636053e-05,
"loss": 0.2251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21820557117462158,
"step": 1330,
"valid_targets_mean": 4328.9,
"valid_targets_min": 301
},
{
"epoch": 2.966666666666667,
"grad_norm": 0.5163666912550201,
"learning_rate": 2.8547658191994054e-05,
"loss": 0.2159,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2369096875190735,
"step": 1335,
"valid_targets_mean": 3999.9,
"valid_targets_min": 467
},
{
"epoch": 2.977777777777778,
"grad_norm": 0.4525486700692223,
"learning_rate": 2.844734330578567e-05,
"loss": 0.2165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22851812839508057,
"step": 1340,
"valid_targets_mean": 5680.0,
"valid_targets_min": 968
},
{
"epoch": 2.988888888888889,
"grad_norm": 0.5038511428892624,
"learning_rate": 2.834676908963636e-05,
"loss": 0.2223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20257601141929626,
"step": 1345,
"valid_targets_mean": 4431.0,
"valid_targets_min": 604
},
{
"epoch": 3.0,
"grad_norm": 0.49413740091680286,
"learning_rate": 2.824593863113291e-05,
"loss": 0.2423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2500663995742798,
"step": 1350,
"valid_targets_mean": 5082.1,
"valid_targets_min": 1029
},
{
"epoch": 3.011111111111111,
"grad_norm": 0.5890237248654047,
"learning_rate": 2.814485502572863e-05,
"loss": 0.1906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18735849857330322,
"step": 1355,
"valid_targets_mean": 5022.9,
"valid_targets_min": 1330
},
{
"epoch": 3.022222222222222,
"grad_norm": 0.5397593530227527,
"learning_rate": 2.804352137664835e-05,
"loss": 0.2145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2516242265701294,
"step": 1360,
"valid_targets_mean": 5099.1,
"valid_targets_min": 360
},
{
"epoch": 3.033333333333333,
"grad_norm": 0.568525348633587,
"learning_rate": 2.7941940794793122e-05,
"loss": 0.1982,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19268794357776642,
"step": 1365,
"valid_targets_mean": 3634.8,
"valid_targets_min": 294
},
{
"epoch": 3.0444444444444443,
"grad_norm": 0.504377586126035,
"learning_rate": 2.7840116398644742e-05,
"loss": 0.1881,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19320614635944366,
"step": 1370,
"valid_targets_mean": 4622.8,
"valid_targets_min": 599
},
{
"epoch": 3.0555555555555554,
"grad_norm": 0.6922737256269529,
"learning_rate": 2.7738051314169993e-05,
"loss": 0.195,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2083386778831482,
"step": 1375,
"valid_targets_mean": 5940.1,
"valid_targets_min": 2844
},
{
"epoch": 3.066666666666667,
"grad_norm": 0.45807741860705403,
"learning_rate": 2.7635748674724702e-05,
"loss": 0.1936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17992693185806274,
"step": 1380,
"valid_targets_mean": 5056.9,
"valid_targets_min": 581
},
{
"epoch": 3.077777777777778,
"grad_norm": 0.49380418579141977,
"learning_rate": 2.7533211620957532e-05,
"loss": 0.1928,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20595912635326385,
"step": 1385,
"valid_targets_mean": 5148.5,
"valid_targets_min": 2209
},
{
"epoch": 3.088888888888889,
"grad_norm": 0.6287045912836227,
"learning_rate": 2.743044330071356e-05,
"loss": 0.2061,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2235325574874878,
"step": 1390,
"valid_targets_mean": 3636.7,
"valid_targets_min": 382
},
{
"epoch": 3.1,
"grad_norm": 0.5630479669186463,
"learning_rate": 2.7327446868937664e-05,
"loss": 0.1965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19244463741779327,
"step": 1395,
"valid_targets_mean": 5513.7,
"valid_targets_min": 361
},
{
"epoch": 3.111111111111111,
"grad_norm": 0.47811337357814554,
"learning_rate": 2.7224225487577637e-05,
"loss": 0.1885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1944870501756668,
"step": 1400,
"valid_targets_mean": 5109.3,
"valid_targets_min": 214
},
{
"epoch": 3.1222222222222222,
"grad_norm": 0.5608741822584281,
"learning_rate": 2.712078232548714e-05,
"loss": 0.2004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1881995052099228,
"step": 1405,
"valid_targets_mean": 4487.5,
"valid_targets_min": 650
},
{
"epoch": 3.1333333333333333,
"grad_norm": 0.5261157563058694,
"learning_rate": 2.7017120558328395e-05,
"loss": 0.2093,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1874210387468338,
"step": 1410,
"valid_targets_mean": 4228.4,
"valid_targets_min": 467
},
{
"epoch": 3.1444444444444444,
"grad_norm": 0.5496647265564715,
"learning_rate": 2.6913243368474734e-05,
"loss": 0.2034,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22300265729427338,
"step": 1415,
"valid_targets_mean": 4393.3,
"valid_targets_min": 642
},
{
"epoch": 3.1555555555555554,
"grad_norm": 0.49758618583607633,
"learning_rate": 2.680915394491286e-05,
"loss": 0.2028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19458116590976715,
"step": 1420,
"valid_targets_mean": 4739.6,
"valid_targets_min": 542
},
{
"epoch": 3.1666666666666665,
"grad_norm": 0.5218888450001141,
"learning_rate": 2.6704855483144973e-05,
"loss": 0.2088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21135935187339783,
"step": 1425,
"valid_targets_mean": 4440.1,
"valid_targets_min": 992
},
{
"epoch": 3.1777777777777776,
"grad_norm": 0.5426640353608564,
"learning_rate": 2.6600351185090637e-05,
"loss": 0.211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19701051712036133,
"step": 1430,
"valid_targets_mean": 3513.1,
"valid_targets_min": 291
},
{
"epoch": 3.188888888888889,
"grad_norm": 0.5662706793465802,
"learning_rate": 2.649564425898853e-05,
"loss": 0.1992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21139845252037048,
"step": 1435,
"valid_targets_mean": 3985.6,
"valid_targets_min": 699
},
{
"epoch": 3.2,
"grad_norm": 0.4718898918421046,
"learning_rate": 2.6390737919297925e-05,
"loss": 0.1933,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1822149157524109,
"step": 1440,
"valid_targets_mean": 5072.8,
"valid_targets_min": 903
},
{
"epoch": 3.2111111111111112,
"grad_norm": 0.5271902172821914,
"learning_rate": 2.6285635386599983e-05,
"loss": 0.2036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1844690442085266,
"step": 1445,
"valid_targets_mean": 4660.9,
"valid_targets_min": 942
},
{
"epoch": 3.2222222222222223,
"grad_norm": 0.7537646783410501,
"learning_rate": 2.618033988749895e-05,
"loss": 0.2114,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20052474737167358,
"step": 1450,
"valid_targets_mean": 3193.3,
"valid_targets_min": 263
},
{
"epoch": 3.2333333333333334,
"grad_norm": 0.4690363852812957,
"learning_rate": 2.6074854654523023e-05,
"loss": 0.2106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16611334681510925,
"step": 1455,
"valid_targets_mean": 4196.4,
"valid_targets_min": 469
},
{
"epoch": 3.2444444444444445,
"grad_norm": 0.4572908827805106,
"learning_rate": 2.596918292602518e-05,
"loss": 0.199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18506214022636414,
"step": 1460,
"valid_targets_mean": 5220.0,
"valid_targets_min": 507
},
{
"epoch": 3.2555555555555555,
"grad_norm": 0.5395419499985299,
"learning_rate": 2.586332794608371e-05,
"loss": 0.1997,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18608039617538452,
"step": 1465,
"valid_targets_mean": 3894.8,
"valid_targets_min": 1041
},
{
"epoch": 3.2666666666666666,
"grad_norm": 0.8552816175037915,
"learning_rate": 2.5757292964402653e-05,
"loss": 0.1956,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18753841519355774,
"step": 1470,
"valid_targets_mean": 4177.2,
"valid_targets_min": 357
},
{
"epoch": 3.2777777777777777,
"grad_norm": 0.6502074224948179,
"learning_rate": 2.5651081236212045e-05,
"loss": 0.193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1877031922340393,
"step": 1475,
"valid_targets_mean": 3714.6,
"valid_targets_min": 587
},
{
"epoch": 3.2888888888888888,
"grad_norm": 0.504138623747214,
"learning_rate": 2.5544696022167945e-05,
"loss": 0.1966,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19270633161067963,
"step": 1480,
"valid_targets_mean": 5232.9,
"valid_targets_min": 621
},
{
"epoch": 3.3,
"grad_norm": 0.5562353529549702,
"learning_rate": 2.5438140588252367e-05,
"loss": 0.2198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23178471624851227,
"step": 1485,
"valid_targets_mean": 4443.0,
"valid_targets_min": 1115
},
{
"epoch": 3.311111111111111,
"grad_norm": 0.6990935382908224,
"learning_rate": 2.5331418205672988e-05,
"loss": 0.1837,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1817629337310791,
"step": 1490,
"valid_targets_mean": 3285.0,
"valid_targets_min": 387
},
{
"epoch": 3.3222222222222224,
"grad_norm": 0.5161517693720137,
"learning_rate": 2.522453215076277e-05,
"loss": 0.208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21749022603034973,
"step": 1495,
"valid_targets_mean": 5134.9,
"valid_targets_min": 839
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.546904151431328,
"learning_rate": 2.511748570487932e-05,
"loss": 0.2122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23374782502651215,
"step": 1500,
"valid_targets_mean": 4250.7,
"valid_targets_min": 588
},
{
"epoch": 3.3444444444444446,
"grad_norm": 0.47277844481370324,
"learning_rate": 2.5010282154304193e-05,
"loss": 0.1975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20504996180534363,
"step": 1505,
"valid_targets_mean": 5340.2,
"valid_targets_min": 586
},
{
"epoch": 3.3555555555555556,
"grad_norm": 0.5412015372324224,
"learning_rate": 2.4902924790142004e-05,
"loss": 0.2019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20246189832687378,
"step": 1510,
"valid_targets_mean": 3491.1,
"valid_targets_min": 218
},
{
"epoch": 3.3666666666666667,
"grad_norm": 0.5638774958328592,
"learning_rate": 2.479541690821935e-05,
"loss": 0.2145,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22698086500167847,
"step": 1515,
"valid_targets_mean": 3940.7,
"valid_targets_min": 568
},
{
"epoch": 3.3777777777777778,
"grad_norm": 0.4561709138055978,
"learning_rate": 2.4687761808983693e-05,
"loss": 0.1977,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1643875539302826,
"step": 1520,
"valid_targets_mean": 5342.7,
"valid_targets_min": 1439
},
{
"epoch": 3.388888888888889,
"grad_norm": 0.5234462175070752,
"learning_rate": 2.457996279740199e-05,
"loss": 0.1974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19301840662956238,
"step": 1525,
"valid_targets_mean": 4052.8,
"valid_targets_min": 636
},
{
"epoch": 3.4,
"grad_norm": 0.5228201850030787,
"learning_rate": 2.4472023182859257e-05,
"loss": 0.1913,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18041759729385376,
"step": 1530,
"valid_targets_mean": 4235.0,
"valid_targets_min": 1815
},
{
"epoch": 3.411111111111111,
"grad_norm": 0.5751004806856855,
"learning_rate": 2.4363946279056947e-05,
"loss": 0.199,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20007622241973877,
"step": 1535,
"valid_targets_mean": 5299.6,
"valid_targets_min": 1225
},
{
"epoch": 3.422222222222222,
"grad_norm": 0.5955197652307113,
"learning_rate": 2.4255735403911243e-05,
"loss": 0.215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19078674912452698,
"step": 1540,
"valid_targets_mean": 3658.2,
"valid_targets_min": 808
},
{
"epoch": 3.4333333333333336,
"grad_norm": 0.5223462323520682,
"learning_rate": 2.4147393879451205e-05,
"loss": 0.215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22115221619606018,
"step": 1545,
"valid_targets_mean": 4941.4,
"valid_targets_min": 530
},
{
"epoch": 3.4444444444444446,
"grad_norm": 0.5358147295716755,
"learning_rate": 2.4038925031716755e-05,
"loss": 0.2036,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1802097111940384,
"step": 1550,
"valid_targets_mean": 3892.2,
"valid_targets_min": 376
},
{
"epoch": 3.4555555555555557,
"grad_norm": 0.5061336217509301,
"learning_rate": 2.3930332190656604e-05,
"loss": 0.2151,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1854897439479828,
"step": 1555,
"valid_targets_mean": 4797.3,
"valid_targets_min": 545
},
{
"epoch": 3.466666666666667,
"grad_norm": 0.6266246922904652,
"learning_rate": 2.382161869002599e-05,
"loss": 0.1986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2148759663105011,
"step": 1560,
"valid_targets_mean": 3841.4,
"valid_targets_min": 822
},
{
"epoch": 3.477777777777778,
"grad_norm": 0.7044788267096704,
"learning_rate": 2.371278786728436e-05,
"loss": 0.2006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24996237456798553,
"step": 1565,
"valid_targets_mean": 2695.3,
"valid_targets_min": 332
},
{
"epoch": 3.488888888888889,
"grad_norm": 0.49301848581474994,
"learning_rate": 2.3603843063492892e-05,
"loss": 0.1948,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2028578519821167,
"step": 1570,
"valid_targets_mean": 5528.7,
"valid_targets_min": 2284
},
{
"epoch": 3.5,
"grad_norm": 0.5882627059324441,
"learning_rate": 2.3494787623211954e-05,
"loss": 0.1965,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19135794043540955,
"step": 1575,
"valid_targets_mean": 3645.6,
"valid_targets_min": 1006
},
{
"epoch": 3.511111111111111,
"grad_norm": 0.4963690972784803,
"learning_rate": 2.3385624894398387e-05,
"loss": 0.1973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20491212606430054,
"step": 1580,
"valid_targets_mean": 5376.0,
"valid_targets_min": 769
},
{
"epoch": 3.522222222222222,
"grad_norm": 0.538419155290378,
"learning_rate": 2.3276358228302757e-05,
"loss": 0.2111,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20943906903266907,
"step": 1585,
"valid_targets_mean": 3945.6,
"valid_targets_min": 590
},
{
"epoch": 3.533333333333333,
"grad_norm": 0.6038918409459312,
"learning_rate": 2.316699097936646e-05,
"loss": 0.191,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1826208233833313,
"step": 1590,
"valid_targets_mean": 2921.8,
"valid_targets_min": 247
},
{
"epoch": 3.5444444444444443,
"grad_norm": 0.48461226325428114,
"learning_rate": 2.305752650511874e-05,
"loss": 0.1971,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23485904932022095,
"step": 1595,
"valid_targets_mean": 5048.4,
"valid_targets_min": 550
},
{
"epoch": 3.5555555555555554,
"grad_norm": 0.4544047818243079,
"learning_rate": 2.2947968166073627e-05,
"loss": 0.2015,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17371800541877747,
"step": 1600,
"valid_targets_mean": 4993.6,
"valid_targets_min": 503
},
{
"epoch": 3.5666666666666664,
"grad_norm": 0.4627984476791086,
"learning_rate": 2.2838319325626746e-05,
"loss": 0.1917,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17911431193351746,
"step": 1605,
"valid_targets_mean": 5385.9,
"valid_targets_min": 2454
},
{
"epoch": 3.5777777777777775,
"grad_norm": 0.5655196781582149,
"learning_rate": 2.2728583349952094e-05,
"loss": 0.1995,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18993490934371948,
"step": 1610,
"valid_targets_mean": 4732.7,
"valid_targets_min": 774
},
{
"epoch": 3.588888888888889,
"grad_norm": 0.5352167184955361,
"learning_rate": 2.2618763607898666e-05,
"loss": 0.2028,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1996089518070221,
"step": 1615,
"valid_targets_mean": 4169.6,
"valid_targets_min": 391
},
{
"epoch": 3.6,
"grad_norm": 0.533764539999318,
"learning_rate": 2.250886347088707e-05,
"loss": 0.1992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1746983379125595,
"step": 1620,
"valid_targets_mean": 4142.2,
"valid_targets_min": 402
},
{
"epoch": 3.611111111111111,
"grad_norm": 0.510040722808607,
"learning_rate": 2.2398886312805996e-05,
"loss": 0.1865,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19001047313213348,
"step": 1625,
"valid_targets_mean": 5082.9,
"valid_targets_min": 636
},
{
"epoch": 3.6222222222222222,
"grad_norm": 0.5755748579267672,
"learning_rate": 2.228883550990864e-05,
"loss": 0.213,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21132859587669373,
"step": 1630,
"valid_targets_mean": 4745.2,
"valid_targets_min": 537
},
{
"epoch": 3.6333333333333333,
"grad_norm": 0.4921775589074553,
"learning_rate": 2.2178714440709084e-05,
"loss": 0.203,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1739899069070816,
"step": 1635,
"valid_targets_mean": 4681.7,
"valid_targets_min": 750
},
{
"epoch": 3.6444444444444444,
"grad_norm": 0.4640557234030302,
"learning_rate": 2.206852648587853e-05,
"loss": 0.2106,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20348605513572693,
"step": 1640,
"valid_targets_mean": 5669.5,
"valid_targets_min": 932
},
{
"epoch": 3.6555555555555554,
"grad_norm": 0.5916842012997149,
"learning_rate": 2.1958275028141566e-05,
"loss": 0.2239,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.27221259474754333,
"step": 1645,
"valid_targets_mean": 4139.3,
"valid_targets_min": 510
},
{
"epoch": 3.6666666666666665,
"grad_norm": 0.5390040585860258,
"learning_rate": 2.1847963452172283e-05,
"loss": 0.192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1900901198387146,
"step": 1650,
"valid_targets_mean": 3726.5,
"valid_targets_min": 825
},
{
"epoch": 3.677777777777778,
"grad_norm": 0.5164729678261945,
"learning_rate": 2.173759514449037e-05,
"loss": 0.2014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19901810586452484,
"step": 1655,
"valid_targets_mean": 4287.4,
"valid_targets_min": 651
},
{
"epoch": 3.688888888888889,
"grad_norm": 0.5897377625071923,
"learning_rate": 2.1627173493357167e-05,
"loss": 0.1981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2279503494501114,
"step": 1660,
"valid_targets_mean": 4376.0,
"valid_targets_min": 460
},
{
"epoch": 3.7,
"grad_norm": 0.5386462237457673,
"learning_rate": 2.1516701888671633e-05,
"loss": 0.2009,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19399359822273254,
"step": 1665,
"valid_targets_mean": 4516.8,
"valid_targets_min": 1662
},
{
"epoch": 3.7111111111111112,
"grad_norm": 0.6495411852106558,
"learning_rate": 2.1406183721866274e-05,
"loss": 0.2019,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21747617423534393,
"step": 1670,
"valid_targets_mean": 3540.1,
"valid_targets_min": 859
},
{
"epoch": 3.7222222222222223,
"grad_norm": 0.6306749469465672,
"learning_rate": 2.1295622385803036e-05,
"loss": 0.2024,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2108190804719925,
"step": 1675,
"valid_targets_mean": 3204.0,
"valid_targets_min": 394
},
{
"epoch": 3.7333333333333334,
"grad_norm": 0.5431455240184233,
"learning_rate": 2.118502127466916e-05,
"loss": 0.1978,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17692044377326965,
"step": 1680,
"valid_targets_mean": 4319.1,
"valid_targets_min": 897
},
{
"epoch": 3.7444444444444445,
"grad_norm": 0.6628241987227439,
"learning_rate": 2.1074383783872932e-05,
"loss": 0.198,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21650046110153198,
"step": 1685,
"valid_targets_mean": 4163.4,
"valid_targets_min": 597
},
{
"epoch": 3.7555555555555555,
"grad_norm": 0.5030160452803907,
"learning_rate": 2.0963713309939516e-05,
"loss": 0.1994,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18289291858673096,
"step": 1690,
"valid_targets_mean": 4629.3,
"valid_targets_min": 1126
},
{
"epoch": 3.7666666666666666,
"grad_norm": 0.4851965144847549,
"learning_rate": 2.0853013250406616e-05,
"loss": 0.2008,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18697203695774078,
"step": 1695,
"valid_targets_mean": 4800.1,
"valid_targets_min": 449
},
{
"epoch": 3.7777777777777777,
"grad_norm": 0.5785969302326881,
"learning_rate": 2.0742287003720207e-05,
"loss": 0.2006,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21332597732543945,
"step": 1700,
"valid_targets_mean": 3694.1,
"valid_targets_min": 786
},
{
"epoch": 3.7888888888888888,
"grad_norm": 0.5232277659642465,
"learning_rate": 2.063153796913022e-05,
"loss": 0.1906,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18892058730125427,
"step": 1705,
"valid_targets_mean": 4132.3,
"valid_targets_min": 915
},
{
"epoch": 3.8,
"grad_norm": 0.4794174421487984,
"learning_rate": 2.0520769546586133e-05,
"loss": 0.2013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17793911695480347,
"step": 1710,
"valid_targets_mean": 4531.4,
"valid_targets_min": 629
},
{
"epoch": 3.811111111111111,
"grad_norm": 0.6049071480792642,
"learning_rate": 2.040998513663265e-05,
"loss": 0.2127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1857706606388092,
"step": 1715,
"valid_targets_mean": 3913.2,
"valid_targets_min": 760
},
{
"epoch": 3.822222222222222,
"grad_norm": 0.5173798800043088,
"learning_rate": 2.0299188140305276e-05,
"loss": 0.2227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23684772849082947,
"step": 1720,
"valid_targets_mean": 4988.8,
"valid_targets_min": 907
},
{
"epoch": 3.8333333333333335,
"grad_norm": 0.475948457370568,
"learning_rate": 2.0188381959025905e-05,
"loss": 0.2102,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21646377444267273,
"step": 1725,
"valid_targets_mean": 5030.8,
"valid_targets_min": 893
},
{
"epoch": 3.8444444444444446,
"grad_norm": 0.5604528558221183,
"learning_rate": 2.007756999449841e-05,
"loss": 0.2154,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21436989307403564,
"step": 1730,
"valid_targets_mean": 4397.3,
"valid_targets_min": 436
},
{
"epoch": 3.8555555555555556,
"grad_norm": 0.5570797649957374,
"learning_rate": 1.9966755648604214e-05,
"loss": 0.2082,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24257858097553253,
"step": 1735,
"valid_targets_mean": 4071.1,
"valid_targets_min": 727
},
{
"epoch": 3.8666666666666667,
"grad_norm": 0.46080018957317304,
"learning_rate": 1.985594232329783e-05,
"loss": 0.1974,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18925702571868896,
"step": 1740,
"valid_targets_mean": 5059.1,
"valid_targets_min": 701
},
{
"epoch": 3.8777777777777778,
"grad_norm": 0.4812829548781769,
"learning_rate": 1.9745133420502465e-05,
"loss": 0.2088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1966492235660553,
"step": 1745,
"valid_targets_mean": 5313.2,
"valid_targets_min": 356
},
{
"epoch": 3.888888888888889,
"grad_norm": 0.5203906670242995,
"learning_rate": 1.963433234200553e-05,
"loss": 0.2074,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21304510533809662,
"step": 1750,
"valid_targets_mean": 5512.0,
"valid_targets_min": 2531
},
{
"epoch": 3.9,
"grad_norm": 0.6126769424919433,
"learning_rate": 1.9523542489354256e-05,
"loss": 0.2047,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22729770839214325,
"step": 1755,
"valid_targets_mean": 5147.4,
"valid_targets_min": 806
},
{
"epoch": 3.911111111111111,
"grad_norm": 0.7274046140099543,
"learning_rate": 1.941276726375122e-05,
"loss": 0.1936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21173256635665894,
"step": 1760,
"valid_targets_mean": 3680.5,
"valid_targets_min": 341
},
{
"epoch": 3.9222222222222225,
"grad_norm": 0.5066040807104453,
"learning_rate": 1.930201006594999e-05,
"loss": 0.2063,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1861998438835144,
"step": 1765,
"valid_targets_mean": 4067.4,
"valid_targets_min": 679
},
{
"epoch": 3.9333333333333336,
"grad_norm": 0.5122320443909304,
"learning_rate": 1.9191274296150636e-05,
"loss": 0.1942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1726941615343094,
"step": 1770,
"valid_targets_mean": 5269.1,
"valid_targets_min": 2317
},
{
"epoch": 3.9444444444444446,
"grad_norm": 0.49916849396763285,
"learning_rate": 1.9080563353895468e-05,
"loss": 0.2072,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22421357035636902,
"step": 1775,
"valid_targets_mean": 5178.7,
"valid_targets_min": 1286
},
{
"epoch": 3.9555555555555557,
"grad_norm": 0.5416199282424495,
"learning_rate": 1.8969880637964523e-05,
"loss": 0.2059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20803487300872803,
"step": 1780,
"valid_targets_mean": 4335.1,
"valid_targets_min": 558
},
{
"epoch": 3.966666666666667,
"grad_norm": 0.5193931663669582,
"learning_rate": 1.885922954627137e-05,
"loss": 0.1871,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.189504474401474,
"step": 1785,
"valid_targets_mean": 4079.9,
"valid_targets_min": 238
},
{
"epoch": 3.977777777777778,
"grad_norm": 0.5499095062300164,
"learning_rate": 1.874861347575867e-05,
"loss": 0.1983,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20028920471668243,
"step": 1790,
"valid_targets_mean": 3822.6,
"valid_targets_min": 289
},
{
"epoch": 3.988888888888889,
"grad_norm": 0.8033600112574325,
"learning_rate": 1.8638035822294e-05,
"loss": 0.2081,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1908196657896042,
"step": 1795,
"valid_targets_mean": 3417.7,
"valid_targets_min": 579
},
{
"epoch": 4.0,
"grad_norm": 0.5699774855020878,
"learning_rate": 1.8527499980565505e-05,
"loss": 0.21,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20389728248119354,
"step": 1800,
"valid_targets_mean": 3703.6,
"valid_targets_min": 626
},
{
"epoch": 4.011111111111111,
"grad_norm": 0.5684718820432649,
"learning_rate": 1.841700934397776e-05,
"loss": 0.1853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16114360094070435,
"step": 1805,
"valid_targets_mean": 4019.9,
"valid_targets_min": 359
},
{
"epoch": 4.022222222222222,
"grad_norm": 0.8806388269505611,
"learning_rate": 1.8306567304547537e-05,
"loss": 0.1936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19996638596057892,
"step": 1810,
"valid_targets_mean": 4715.6,
"valid_targets_min": 626
},
{
"epoch": 4.033333333333333,
"grad_norm": 0.51053756371864,
"learning_rate": 1.8196177252799715e-05,
"loss": 0.1789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17319592833518982,
"step": 1815,
"valid_targets_mean": 4569.9,
"valid_targets_min": 310
},
{
"epoch": 4.044444444444444,
"grad_norm": 0.5334838424279315,
"learning_rate": 1.8085842577663152e-05,
"loss": 0.1887,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17766398191452026,
"step": 1820,
"valid_targets_mean": 4956.6,
"valid_targets_min": 1254
},
{
"epoch": 4.055555555555555,
"grad_norm": 0.5972352034988178,
"learning_rate": 1.797556666636669e-05,
"loss": 0.1874,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21527108550071716,
"step": 1825,
"valid_targets_mean": 4466.4,
"valid_targets_min": 752
},
{
"epoch": 4.066666666666666,
"grad_norm": 0.6680425414614198,
"learning_rate": 1.786535290433512e-05,
"loss": 0.2079,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18751859664916992,
"step": 1830,
"valid_targets_mean": 3271.0,
"valid_targets_min": 257
},
{
"epoch": 4.0777777777777775,
"grad_norm": 0.6531224124175032,
"learning_rate": 1.775520467508531e-05,
"loss": 0.1778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17578667402267456,
"step": 1835,
"valid_targets_mean": 4452.1,
"valid_targets_min": 272
},
{
"epoch": 4.088888888888889,
"grad_norm": 0.5260657108991496,
"learning_rate": 1.7645125360122254e-05,
"loss": 0.1968,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1891654133796692,
"step": 1840,
"valid_targets_mean": 4629.7,
"valid_targets_min": 449
},
{
"epoch": 4.1,
"grad_norm": 0.6623628808793622,
"learning_rate": 1.7535118338835358e-05,
"loss": 0.1791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20977142453193665,
"step": 1845,
"valid_targets_mean": 4678.6,
"valid_targets_min": 706
},
{
"epoch": 4.111111111111111,
"grad_norm": 0.49817831354909214,
"learning_rate": 1.7425186988394586e-05,
"loss": 0.1687,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15036168694496155,
"step": 1850,
"valid_targets_mean": 4100.6,
"valid_targets_min": 460
},
{
"epoch": 4.122222222222222,
"grad_norm": 0.7692282976104367,
"learning_rate": 1.7315334683646898e-05,
"loss": 0.1957,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2357831746339798,
"step": 1855,
"valid_targets_mean": 3612.0,
"valid_targets_min": 469
},
{
"epoch": 4.133333333333334,
"grad_norm": 0.7604226917728939,
"learning_rate": 1.7205564797012523e-05,
"loss": 0.1756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18661850690841675,
"step": 1860,
"valid_targets_mean": 2620.3,
"valid_targets_min": 350
},
{
"epoch": 4.144444444444445,
"grad_norm": 2.0101196160603068,
"learning_rate": 1.709588069838154e-05,
"loss": 0.1809,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1724318563938141,
"step": 1865,
"valid_targets_mean": 5017.0,
"valid_targets_min": 1569
},
{
"epoch": 4.155555555555556,
"grad_norm": 0.5176081748220945,
"learning_rate": 1.698628575501034e-05,
"loss": 0.1936,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19876284897327423,
"step": 1870,
"valid_targets_mean": 5018.9,
"valid_targets_min": 263
},
{
"epoch": 4.166666666666667,
"grad_norm": 0.9808635975742276,
"learning_rate": 1.6876783331418298e-05,
"loss": 0.1717,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1666911095380783,
"step": 1875,
"valid_targets_mean": 4702.4,
"valid_targets_min": 407
},
{
"epoch": 4.177777777777778,
"grad_norm": 0.5808524542461861,
"learning_rate": 1.6767376789284463e-05,
"loss": 0.1774,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1758621335029602,
"step": 1880,
"valid_targets_mean": 4182.8,
"valid_targets_min": 784
},
{
"epoch": 4.188888888888889,
"grad_norm": 0.6204999195476525,
"learning_rate": 1.6658069487344375e-05,
"loss": 0.1899,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2005249559879303,
"step": 1885,
"valid_targets_mean": 3792.5,
"valid_targets_min": 494
},
{
"epoch": 4.2,
"grad_norm": 0.5137903997287736,
"learning_rate": 1.6548864781286922e-05,
"loss": 0.1972,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2340593785047531,
"step": 1890,
"valid_targets_mean": 5812.1,
"valid_targets_min": 524
},
{
"epoch": 4.211111111111111,
"grad_norm": 0.6084912042277302,
"learning_rate": 1.643976602365136e-05,
"loss": 0.1757,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16975033283233643,
"step": 1895,
"valid_targets_mean": 4331.1,
"valid_targets_min": 331
},
{
"epoch": 4.222222222222222,
"grad_norm": 0.6075386711012212,
"learning_rate": 1.6330776563724354e-05,
"loss": 0.185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22459827363491058,
"step": 1900,
"valid_targets_mean": 4126.8,
"valid_targets_min": 795
},
{
"epoch": 4.233333333333333,
"grad_norm": 0.5755129056791891,
"learning_rate": 1.62218997474372e-05,
"loss": 0.1838,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21275658905506134,
"step": 1905,
"valid_targets_mean": 4578.6,
"valid_targets_min": 1108
},
{
"epoch": 4.2444444444444445,
"grad_norm": 0.48496618553325505,
"learning_rate": 1.6113138917263048e-05,
"loss": 0.1772,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19767220318317413,
"step": 1910,
"valid_targets_mean": 5804.9,
"valid_targets_min": 2846
},
{
"epoch": 4.2555555555555555,
"grad_norm": 0.4549668072241525,
"learning_rate": 1.6004497412114354e-05,
"loss": 0.1867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17819060385227203,
"step": 1915,
"valid_targets_mean": 5614.6,
"valid_targets_min": 1831
},
{
"epoch": 4.266666666666667,
"grad_norm": 0.6183406372685394,
"learning_rate": 1.5895978567240314e-05,
"loss": 0.1749,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15784139931201935,
"step": 1920,
"valid_targets_mean": 4810.9,
"valid_targets_min": 773
},
{
"epoch": 4.277777777777778,
"grad_norm": 0.8322895904947307,
"learning_rate": 1.578758571412455e-05,
"loss": 0.1762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18652507662773132,
"step": 1925,
"valid_targets_mean": 4329.2,
"valid_targets_min": 386
},
{
"epoch": 4.288888888888889,
"grad_norm": 0.5770135845347438,
"learning_rate": 1.5679322180382725e-05,
"loss": 0.1811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16608160734176636,
"step": 1930,
"valid_targets_mean": 4621.5,
"valid_targets_min": 311
},
{
"epoch": 4.3,
"grad_norm": 0.5340562181628422,
"learning_rate": 1.5571191289660517e-05,
"loss": 0.1788,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19259527325630188,
"step": 1935,
"valid_targets_mean": 4542.4,
"valid_targets_min": 260
},
{
"epoch": 4.311111111111111,
"grad_norm": 0.5261906980551986,
"learning_rate": 1.5463196361531463e-05,
"loss": 0.1761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1652071177959442,
"step": 1940,
"valid_targets_mean": 4197.6,
"valid_targets_min": 351
},
{
"epoch": 4.322222222222222,
"grad_norm": 0.592935992602908,
"learning_rate": 1.5355340711395154e-05,
"loss": 0.1777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.173154816031456,
"step": 1945,
"valid_targets_mean": 3825.3,
"valid_targets_min": 362
},
{
"epoch": 4.333333333333333,
"grad_norm": 0.6272334939060681,
"learning_rate": 1.5247627650375356e-05,
"loss": 0.1931,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2008039653301239,
"step": 1950,
"valid_targets_mean": 3113.1,
"valid_targets_min": 322
},
{
"epoch": 4.344444444444444,
"grad_norm": 0.555228561708398,
"learning_rate": 1.5140060485218448e-05,
"loss": 0.2096,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21200916171073914,
"step": 1955,
"valid_targets_mean": 4375.6,
"valid_targets_min": 310
},
{
"epoch": 4.355555555555555,
"grad_norm": 0.6835598461585742,
"learning_rate": 1.5032642518191842e-05,
"loss": 0.1768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14425814151763916,
"step": 1960,
"valid_targets_mean": 3954.5,
"valid_targets_min": 330
},
{
"epoch": 4.366666666666666,
"grad_norm": 0.6035952467820439,
"learning_rate": 1.4925377046982642e-05,
"loss": 0.2015,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21049894392490387,
"step": 1965,
"valid_targets_mean": 4949.8,
"valid_targets_min": 355
},
{
"epoch": 4.377777777777778,
"grad_norm": 0.5857625986334895,
"learning_rate": 1.4818267364596382e-05,
"loss": 0.1876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18046881258487701,
"step": 1970,
"valid_targets_mean": 4697.1,
"valid_targets_min": 299
},
{
"epoch": 4.388888888888889,
"grad_norm": 0.5718012391776226,
"learning_rate": 1.4711316759255963e-05,
"loss": 0.1753,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1622503101825714,
"step": 1975,
"valid_targets_mean": 3431.1,
"valid_targets_min": 354
},
{
"epoch": 4.4,
"grad_norm": 0.43582007281524776,
"learning_rate": 1.4604528514300657e-05,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15850958228111267,
"step": 1980,
"valid_targets_mean": 5733.1,
"valid_targets_min": 910
},
{
"epoch": 4.411111111111111,
"grad_norm": 0.49374637349645484,
"learning_rate": 1.449790590808537e-05,
"loss": 0.1876,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18013733625411987,
"step": 1985,
"valid_targets_mean": 5131.2,
"valid_targets_min": 337
},
{
"epoch": 4.4222222222222225,
"grad_norm": 0.572851756273217,
"learning_rate": 1.4391452213879949e-05,
"loss": 0.193,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20474226772785187,
"step": 1990,
"valid_targets_mean": 4462.7,
"valid_targets_min": 325
},
{
"epoch": 4.433333333333334,
"grad_norm": 0.5793355474922025,
"learning_rate": 1.428517069976872e-05,
"loss": 0.1718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16491413116455078,
"step": 1995,
"valid_targets_mean": 3528.5,
"valid_targets_min": 552
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.5903074938236061,
"learning_rate": 1.4179064628550139e-05,
"loss": 0.1935,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1816416233778,
"step": 2000,
"valid_targets_mean": 3910.4,
"valid_targets_min": 763
},
{
"epoch": 4.455555555555556,
"grad_norm": 0.6305819902409494,
"learning_rate": 1.4073137257636664e-05,
"loss": 0.1766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19318446516990662,
"step": 2005,
"valid_targets_mean": 3550.6,
"valid_targets_min": 302
},
{
"epoch": 4.466666666666667,
"grad_norm": 0.4779759860608762,
"learning_rate": 1.3967391838954692e-05,
"loss": 0.1843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18327465653419495,
"step": 2010,
"valid_targets_mean": 5106.4,
"valid_targets_min": 500
},
{
"epoch": 4.477777777777778,
"grad_norm": 0.5305939803137136,
"learning_rate": 1.3861831618844797e-05,
"loss": 0.1783,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1693880707025528,
"step": 2015,
"valid_targets_mean": 4351.6,
"valid_targets_min": 229
},
{
"epoch": 4.488888888888889,
"grad_norm": 0.556324424618143,
"learning_rate": 1.3756459837962006e-05,
"loss": 0.1791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20899684727191925,
"step": 2020,
"valid_targets_mean": 4319.3,
"valid_targets_min": 549
},
{
"epoch": 4.5,
"grad_norm": 0.511876429216537,
"learning_rate": 1.3651279731176364e-05,
"loss": 0.1829,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17796722054481506,
"step": 2025,
"valid_targets_mean": 4701.2,
"valid_targets_min": 288
},
{
"epoch": 4.511111111111111,
"grad_norm": 0.483714945780867,
"learning_rate": 1.354629452747357e-05,
"loss": 0.1912,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20415997505187988,
"step": 2030,
"valid_targets_mean": 5413.3,
"valid_targets_min": 828
},
{
"epoch": 4.522222222222222,
"grad_norm": 0.550003762747203,
"learning_rate": 1.3441507449855914e-05,
"loss": 0.1752,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19250717759132385,
"step": 2035,
"valid_targets_mean": 4369.6,
"valid_targets_min": 479
},
{
"epoch": 4.533333333333333,
"grad_norm": 0.6783462987349089,
"learning_rate": 1.3336921715243269e-05,
"loss": 0.1881,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19246190786361694,
"step": 2040,
"valid_targets_mean": 3528.1,
"valid_targets_min": 374
},
{
"epoch": 4.544444444444444,
"grad_norm": 0.5408235091774686,
"learning_rate": 1.323254053437438e-05,
"loss": 0.1904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17241522669792175,
"step": 2045,
"valid_targets_mean": 4255.3,
"valid_targets_min": 369
},
{
"epoch": 4.555555555555555,
"grad_norm": 0.6039322559430673,
"learning_rate": 1.3128367111708263e-05,
"loss": 0.1996,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2341468334197998,
"step": 2050,
"valid_targets_mean": 3956.6,
"valid_targets_min": 394
},
{
"epoch": 4.566666666666666,
"grad_norm": 0.5210903518867492,
"learning_rate": 1.3024404645325852e-05,
"loss": 0.1724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15793342888355255,
"step": 2055,
"valid_targets_mean": 4055.9,
"valid_targets_min": 445
},
{
"epoch": 4.5777777777777775,
"grad_norm": 0.5833872344585873,
"learning_rate": 1.2920656326831802e-05,
"loss": 0.1794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1832209825515747,
"step": 2060,
"valid_targets_mean": 4347.2,
"valid_targets_min": 821
},
{
"epoch": 4.588888888888889,
"grad_norm": 0.6031674137203222,
"learning_rate": 1.2817125341256533e-05,
"loss": 0.1744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1834879219532013,
"step": 2065,
"valid_targets_mean": 4529.2,
"valid_targets_min": 599
},
{
"epoch": 4.6,
"grad_norm": 0.5606753056516752,
"learning_rate": 1.271381486695841e-05,
"loss": 0.1768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19605132937431335,
"step": 2070,
"valid_targets_mean": 4212.9,
"valid_targets_min": 787
},
{
"epoch": 4.611111111111111,
"grad_norm": 0.5776621511116948,
"learning_rate": 1.2610728075526226e-05,
"loss": 0.1756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18587753176689148,
"step": 2075,
"valid_targets_mean": 4053.1,
"valid_targets_min": 691
},
{
"epoch": 4.622222222222222,
"grad_norm": 0.5798129503418713,
"learning_rate": 1.250786813168176e-05,
"loss": 0.1832,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20402874052524567,
"step": 2080,
"valid_targets_mean": 3672.6,
"valid_targets_min": 344
},
{
"epoch": 4.633333333333333,
"grad_norm": 0.6919963067401517,
"learning_rate": 1.2405238193182711e-05,
"loss": 0.1854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17745724320411682,
"step": 2085,
"valid_targets_mean": 4872.3,
"valid_targets_min": 893
},
{
"epoch": 4.644444444444445,
"grad_norm": 0.5410119427822812,
"learning_rate": 1.2302841410725664e-05,
"loss": 0.1766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15331071615219116,
"step": 2090,
"valid_targets_mean": 3988.9,
"valid_targets_min": 428
},
{
"epoch": 4.655555555555556,
"grad_norm": 0.7216545479667651,
"learning_rate": 1.2200680927849447e-05,
"loss": 0.1981,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1797543466091156,
"step": 2095,
"valid_targets_mean": 2614.6,
"valid_targets_min": 251
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.5002180832671941,
"learning_rate": 1.2098759880838562e-05,
"loss": 0.1958,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18461236357688904,
"step": 2100,
"valid_targets_mean": 4813.4,
"valid_targets_min": 435
},
{
"epoch": 4.677777777777778,
"grad_norm": 0.6182929611883429,
"learning_rate": 1.1997081398626951e-05,
"loss": 0.1859,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2105226218700409,
"step": 2105,
"valid_targets_mean": 4094.2,
"valid_targets_min": 323
},
{
"epoch": 4.688888888888889,
"grad_norm": 0.4881507674799763,
"learning_rate": 1.18956486027019e-05,
"loss": 0.1842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19194287061691284,
"step": 2110,
"valid_targets_mean": 5929.9,
"valid_targets_min": 393
},
{
"epoch": 4.7,
"grad_norm": 0.5040091679866664,
"learning_rate": 1.179446460700824e-05,
"loss": 0.1725,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16895370185375214,
"step": 2115,
"valid_targets_mean": 4929.5,
"valid_targets_min": 707
},
{
"epoch": 4.711111111111111,
"grad_norm": 0.6224311426866647,
"learning_rate": 1.1693532517852723e-05,
"loss": 0.1854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19463957846164703,
"step": 2120,
"valid_targets_mean": 3470.4,
"valid_targets_min": 274
},
{
"epoch": 4.722222222222222,
"grad_norm": 0.5006685355462409,
"learning_rate": 1.1592855433808694e-05,
"loss": 0.1834,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18951840698719025,
"step": 2125,
"valid_targets_mean": 4650.5,
"valid_targets_min": 472
},
{
"epoch": 4.733333333333333,
"grad_norm": 0.5583432290318547,
"learning_rate": 1.1492436445620925e-05,
"loss": 0.2116,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2058507204055786,
"step": 2130,
"valid_targets_mean": 4445.6,
"valid_targets_min": 507
},
{
"epoch": 4.7444444444444445,
"grad_norm": 0.6115562244324856,
"learning_rate": 1.1392278636110779e-05,
"loss": 0.1792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1816525161266327,
"step": 2135,
"valid_targets_mean": 3926.6,
"valid_targets_min": 317
},
{
"epoch": 4.7555555555555555,
"grad_norm": 0.6799289643246074,
"learning_rate": 1.1292385080081517e-05,
"loss": 0.1839,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18920482695102692,
"step": 2140,
"valid_targets_mean": 3852.1,
"valid_targets_min": 664
},
{
"epoch": 4.766666666666667,
"grad_norm": 0.5372580596002592,
"learning_rate": 1.1192758844223936e-05,
"loss": 0.1866,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19938793778419495,
"step": 2145,
"valid_targets_mean": 4286.1,
"valid_targets_min": 1315
},
{
"epoch": 4.777777777777778,
"grad_norm": 0.6680811833814686,
"learning_rate": 1.1093402987022213e-05,
"loss": 0.181,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1695014089345932,
"step": 2150,
"valid_targets_mean": 3701.4,
"valid_targets_min": 490
},
{
"epoch": 4.788888888888889,
"grad_norm": 0.532047483958881,
"learning_rate": 1.0994320558660027e-05,
"loss": 0.1763,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17513462901115417,
"step": 2155,
"valid_targets_mean": 4425.6,
"valid_targets_min": 433
},
{
"epoch": 4.8,
"grad_norm": 0.48955367777151076,
"learning_rate": 1.0895514600926885e-05,
"loss": 0.1867,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17399895191192627,
"step": 2160,
"valid_targets_mean": 5218.5,
"valid_targets_min": 214
},
{
"epoch": 4.811111111111111,
"grad_norm": 0.5389514735096081,
"learning_rate": 1.0796988147124767e-05,
"loss": 0.1975,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17753294110298157,
"step": 2165,
"valid_targets_mean": 4527.4,
"valid_targets_min": 503
},
{
"epoch": 4.822222222222222,
"grad_norm": 0.9507144410317093,
"learning_rate": 1.0698744221974992e-05,
"loss": 0.1794,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16774040460586548,
"step": 2170,
"valid_targets_mean": 4067.5,
"valid_targets_min": 326
},
{
"epoch": 4.833333333333333,
"grad_norm": 0.7577965149578579,
"learning_rate": 1.0600785841525387e-05,
"loss": 0.1856,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.171237975358963,
"step": 2175,
"valid_targets_mean": 2212.1,
"valid_targets_min": 244
},
{
"epoch": 4.844444444444444,
"grad_norm": 0.6346378233034229,
"learning_rate": 1.050311601305765e-05,
"loss": 0.1986,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19538968801498413,
"step": 2180,
"valid_targets_mean": 3583.8,
"valid_targets_min": 516
},
{
"epoch": 4.855555555555555,
"grad_norm": 0.4998326129850529,
"learning_rate": 1.0405737734995083e-05,
"loss": 0.1882,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18201160430908203,
"step": 2185,
"valid_targets_mean": 5208.7,
"valid_targets_min": 532
},
{
"epoch": 4.866666666666667,
"grad_norm": 0.4912406558278592,
"learning_rate": 1.0308653996810464e-05,
"loss": 0.188,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1872013956308365,
"step": 2190,
"valid_targets_mean": 5092.2,
"valid_targets_min": 425
},
{
"epoch": 4.877777777777778,
"grad_norm": 0.49376056988072214,
"learning_rate": 1.0211867778934367e-05,
"loss": 0.1697,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1690564602613449,
"step": 2195,
"valid_targets_mean": 4442.9,
"valid_targets_min": 429
},
{
"epoch": 4.888888888888889,
"grad_norm": 0.508817245830961,
"learning_rate": 1.0115382052663585e-05,
"loss": 0.1694,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18353170156478882,
"step": 2200,
"valid_targets_mean": 4733.5,
"valid_targets_min": 771
},
{
"epoch": 4.9,
"grad_norm": 0.6203985436463371,
"learning_rate": 1.0019199780069964e-05,
"loss": 0.1868,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.186293363571167,
"step": 2205,
"valid_targets_mean": 5280.8,
"valid_targets_min": 1076
},
{
"epoch": 4.911111111111111,
"grad_norm": 0.5435822402870782,
"learning_rate": 9.923323913909432e-06,
"loss": 0.178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16027119755744934,
"step": 2210,
"valid_targets_mean": 4576.7,
"valid_targets_min": 715
},
{
"epoch": 4.9222222222222225,
"grad_norm": 0.4813208514339158,
"learning_rate": 9.827757397531373e-06,
"loss": 0.1885,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18010596930980682,
"step": 2215,
"valid_targets_mean": 4874.1,
"valid_targets_min": 514
},
{
"epoch": 4.933333333333334,
"grad_norm": 0.47571932658220617,
"learning_rate": 9.732503164788251e-06,
"loss": 0.1855,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2023318111896515,
"step": 2220,
"valid_targets_mean": 5265.2,
"valid_targets_min": 713
},
{
"epoch": 4.944444444444445,
"grad_norm": 0.5094329037736811,
"learning_rate": 9.637564139945576e-06,
"loss": 0.1875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17661988735198975,
"step": 2225,
"valid_targets_mean": 4558.8,
"valid_targets_min": 687
},
{
"epoch": 4.955555555555556,
"grad_norm": 0.4793807998595178,
"learning_rate": 9.542943237592087e-06,
"loss": 0.1849,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16008728742599487,
"step": 2230,
"valid_targets_mean": 4763.2,
"valid_targets_min": 322
},
{
"epoch": 4.966666666666667,
"grad_norm": 0.5198576392426981,
"learning_rate": 9.448643362550289e-06,
"loss": 0.1765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1592457890510559,
"step": 2235,
"valid_targets_mean": 4264.2,
"valid_targets_min": 533
},
{
"epoch": 4.977777777777778,
"grad_norm": 0.6619815493156004,
"learning_rate": 9.354667409787293e-06,
"loss": 0.187,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2216649353504181,
"step": 2240,
"valid_targets_mean": 3992.6,
"valid_targets_min": 507
},
{
"epoch": 4.988888888888889,
"grad_norm": 0.6140070608373793,
"learning_rate": 9.261018264325934e-06,
"loss": 0.1891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19572074711322784,
"step": 2245,
"valid_targets_mean": 4591.1,
"valid_targets_min": 537
},
{
"epoch": 5.0,
"grad_norm": 0.501892983243998,
"learning_rate": 9.16769880115619e-06,
"loss": 0.1883,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1657697856426239,
"step": 2250,
"valid_targets_mean": 4708.9,
"valid_targets_min": 535
},
{
"epoch": 5.011111111111111,
"grad_norm": 0.5678698075193299,
"learning_rate": 9.074711885146928e-06,
"loss": 0.1815,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18971869349479675,
"step": 2255,
"valid_targets_mean": 4164.5,
"valid_targets_min": 473
},
{
"epoch": 5.022222222222222,
"grad_norm": 0.4976115195176415,
"learning_rate": 8.982060370957953e-06,
"loss": 0.1665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1750781536102295,
"step": 2260,
"valid_targets_mean": 5020.1,
"valid_targets_min": 290
},
{
"epoch": 5.033333333333333,
"grad_norm": 0.684873203009687,
"learning_rate": 8.889747102952388e-06,
"loss": 0.1649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18005429208278656,
"step": 2265,
"valid_targets_mean": 3440.0,
"valid_targets_min": 307
},
{
"epoch": 5.044444444444444,
"grad_norm": 0.5033230409129855,
"learning_rate": 8.79777491510932e-06,
"loss": 0.1776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18455460667610168,
"step": 2270,
"valid_targets_mean": 5328.9,
"valid_targets_min": 247
},
{
"epoch": 5.055555555555555,
"grad_norm": 0.5173477816105783,
"learning_rate": 8.706146630936833e-06,
"loss": 0.1773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16611988842487335,
"step": 2275,
"valid_targets_mean": 4557.4,
"valid_targets_min": 1456
},
{
"epoch": 5.066666666666666,
"grad_norm": 0.6042166823752133,
"learning_rate": 8.6148650633853e-06,
"loss": 0.1678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16022253036499023,
"step": 2280,
"valid_targets_mean": 4060.4,
"valid_targets_min": 406
},
{
"epoch": 5.0777777777777775,
"grad_norm": 0.6312726838673673,
"learning_rate": 8.523933014761038e-06,
"loss": 0.1669,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1730879247188568,
"step": 2285,
"valid_targets_mean": 3444.7,
"valid_targets_min": 591
},
{
"epoch": 5.088888888888889,
"grad_norm": 0.5063988724236291,
"learning_rate": 8.43335327664027e-06,
"loss": 0.1639,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17903856933116913,
"step": 2290,
"valid_targets_mean": 4995.8,
"valid_targets_min": 362
},
{
"epoch": 5.1,
"grad_norm": 0.596851968168963,
"learning_rate": 8.343128629783457e-06,
"loss": 0.1781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1816779375076294,
"step": 2295,
"valid_targets_mean": 5093.5,
"valid_targets_min": 1031
},
{
"epoch": 5.111111111111111,
"grad_norm": 0.5710225280632183,
"learning_rate": 8.253261844049883e-06,
"loss": 0.1854,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.168840691447258,
"step": 2300,
"valid_targets_mean": 4315.1,
"valid_targets_min": 404
},
{
"epoch": 5.122222222222222,
"grad_norm": 0.4748229596313232,
"learning_rate": 8.163755678312651e-06,
"loss": 0.1897,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2132357954978943,
"step": 2305,
"valid_targets_mean": 5789.9,
"valid_targets_min": 283
},
{
"epoch": 5.133333333333334,
"grad_norm": 0.5287110732175844,
"learning_rate": 8.074612880373972e-06,
"loss": 0.156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1418144404888153,
"step": 2310,
"valid_targets_mean": 4440.9,
"valid_targets_min": 820
},
{
"epoch": 5.144444444444445,
"grad_norm": 0.5936650959791397,
"learning_rate": 7.985836186880836e-06,
"loss": 0.1589,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1570664495229721,
"step": 2315,
"valid_targets_mean": 4190.9,
"valid_targets_min": 335
},
{
"epoch": 5.155555555555556,
"grad_norm": 0.5901961361581314,
"learning_rate": 7.897428323240961e-06,
"loss": 0.1688,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14300040900707245,
"step": 2320,
"valid_targets_mean": 4530.4,
"valid_targets_min": 244
},
{
"epoch": 5.166666666666667,
"grad_norm": 0.6401412212898742,
"learning_rate": 7.809392003539142e-06,
"loss": 0.1821,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18366971611976624,
"step": 2325,
"valid_targets_mean": 4497.3,
"valid_targets_min": 263
},
{
"epoch": 5.177777777777778,
"grad_norm": 0.5870360837488321,
"learning_rate": 7.72172993045393e-06,
"loss": 0.1841,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19780203700065613,
"step": 2330,
"valid_targets_mean": 4467.4,
"valid_targets_min": 229
},
{
"epoch": 5.188888888888889,
"grad_norm": 0.6222022824851804,
"learning_rate": 7.634444795174671e-06,
"loss": 0.1689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1735324114561081,
"step": 2335,
"valid_targets_mean": 3150.3,
"valid_targets_min": 545
},
{
"epoch": 5.2,
"grad_norm": 0.612607191802424,
"learning_rate": 7.547539277318861e-06,
"loss": 0.1751,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17350366711616516,
"step": 2340,
"valid_targets_mean": 4619.4,
"valid_targets_min": 2673
},
{
"epoch": 5.211111111111111,
"grad_norm": 0.7049036474479938,
"learning_rate": 7.461016044849918e-06,
"loss": 0.1714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1929350346326828,
"step": 2345,
"valid_targets_mean": 3115.6,
"valid_targets_min": 669
},
{
"epoch": 5.222222222222222,
"grad_norm": 0.5410958789958754,
"learning_rate": 7.374877753995224e-06,
"loss": 0.1691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1535416543483734,
"step": 2350,
"valid_targets_mean": 4524.3,
"valid_targets_min": 270
},
{
"epoch": 5.233333333333333,
"grad_norm": 0.6124896649786123,
"learning_rate": 7.289127049164648e-06,
"loss": 0.1797,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19674161076545715,
"step": 2355,
"valid_targets_mean": 4170.1,
"valid_targets_min": 330
},
{
"epoch": 5.2444444444444445,
"grad_norm": 0.4994651140821769,
"learning_rate": 7.203766562869303e-06,
"loss": 0.1611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17009888589382172,
"step": 2360,
"valid_targets_mean": 5436.4,
"valid_targets_min": 231
},
{
"epoch": 5.2555555555555555,
"grad_norm": 0.6480412371115537,
"learning_rate": 7.118798915640779e-06,
"loss": 0.1703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17472770810127258,
"step": 2365,
"valid_targets_mean": 3678.0,
"valid_targets_min": 550
},
{
"epoch": 5.266666666666667,
"grad_norm": 1.0624757859362757,
"learning_rate": 7.03422671595065e-06,
"loss": 0.1767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1649678647518158,
"step": 2370,
"valid_targets_mean": 4187.2,
"valid_targets_min": 576
},
{
"epoch": 5.277777777777778,
"grad_norm": 0.5370386672344407,
"learning_rate": 6.950052560130414e-06,
"loss": 0.1724,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16847020387649536,
"step": 2375,
"valid_targets_mean": 4683.0,
"valid_targets_min": 347
},
{
"epoch": 5.288888888888889,
"grad_norm": 0.6286328398732474,
"learning_rate": 6.866279032291792e-06,
"loss": 0.1574,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15064719319343567,
"step": 2380,
"valid_targets_mean": 3968.8,
"valid_targets_min": 860
},
{
"epoch": 5.3,
"grad_norm": 0.5874233077841997,
"learning_rate": 6.782908704247404e-06,
"loss": 0.1664,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1599939465522766,
"step": 2385,
"valid_targets_mean": 4385.2,
"valid_targets_min": 489
},
{
"epoch": 5.311111111111111,
"grad_norm": 1.3591182836953655,
"learning_rate": 6.699944135431788e-06,
"loss": 0.1673,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15932810306549072,
"step": 2390,
"valid_targets_mean": 3961.2,
"valid_targets_min": 312
},
{
"epoch": 5.322222222222222,
"grad_norm": 0.5070411136628474,
"learning_rate": 6.617387872822842e-06,
"loss": 0.1614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17544835805892944,
"step": 2395,
"valid_targets_mean": 5117.5,
"valid_targets_min": 760
},
{
"epoch": 5.333333333333333,
"grad_norm": 0.53150416040399,
"learning_rate": 6.535242450863632e-06,
"loss": 0.1807,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18131661415100098,
"step": 2400,
"valid_targets_mean": 4886.9,
"valid_targets_min": 950
},
{
"epoch": 5.344444444444444,
"grad_norm": 0.5740042351970523,
"learning_rate": 6.453510391384606e-06,
"loss": 0.1784,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17442232370376587,
"step": 2405,
"valid_targets_mean": 3791.2,
"valid_targets_min": 423
},
{
"epoch": 5.355555555555555,
"grad_norm": 0.46895250507011754,
"learning_rate": 6.372194203526121e-06,
"loss": 0.1619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1439400017261505,
"step": 2410,
"valid_targets_mean": 5206.1,
"valid_targets_min": 546
},
{
"epoch": 5.366666666666666,
"grad_norm": 0.6356884401904439,
"learning_rate": 6.2912963836614916e-06,
"loss": 0.178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20238983631134033,
"step": 2415,
"valid_targets_mean": 3916.1,
"valid_targets_min": 259
},
{
"epoch": 5.377777777777778,
"grad_norm": 0.6215829652547431,
"learning_rate": 6.210819415320253e-06,
"loss": 0.1758,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14836809039115906,
"step": 2420,
"valid_targets_mean": 4363.0,
"valid_targets_min": 1439
},
{
"epoch": 5.388888888888889,
"grad_norm": 0.5563038744061828,
"learning_rate": 6.130765769112024e-06,
"loss": 0.1663,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17539286613464355,
"step": 2425,
"valid_targets_mean": 4587.9,
"valid_targets_min": 678
},
{
"epoch": 5.4,
"grad_norm": 0.5854838140260286,
"learning_rate": 6.051137902650575e-06,
"loss": 0.18,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1495419442653656,
"step": 2430,
"valid_targets_mean": 5041.7,
"valid_targets_min": 556
},
{
"epoch": 5.411111111111111,
"grad_norm": 0.5363335763908142,
"learning_rate": 5.9719382604784405e-06,
"loss": 0.171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16383875906467438,
"step": 2435,
"valid_targets_mean": 4707.6,
"valid_targets_min": 403
},
{
"epoch": 5.4222222222222225,
"grad_norm": 0.6920747040184556,
"learning_rate": 5.893169273991825e-06,
"loss": 0.1698,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20277723670005798,
"step": 2440,
"valid_targets_mean": 3596.3,
"valid_targets_min": 322
},
{
"epoch": 5.433333333333334,
"grad_norm": 0.5506429671164328,
"learning_rate": 5.8148333613659945e-06,
"loss": 0.1745,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16613167524337769,
"step": 2445,
"valid_targets_mean": 4807.0,
"valid_targets_min": 322
},
{
"epoch": 5.444444444444445,
"grad_norm": 0.5643735346992593,
"learning_rate": 5.736932927481016e-06,
"loss": 0.172,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1531161069869995,
"step": 2450,
"valid_targets_mean": 4414.6,
"valid_targets_min": 820
},
{
"epoch": 5.455555555555556,
"grad_norm": 0.4966057762741589,
"learning_rate": 5.659470363847956e-06,
"loss": 0.1701,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15961554646492004,
"step": 2455,
"valid_targets_mean": 5181.2,
"valid_targets_min": 717
},
{
"epoch": 5.466666666666667,
"grad_norm": 0.597256774781567,
"learning_rate": 5.5824480485354315e-06,
"loss": 0.1689,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19598601758480072,
"step": 2460,
"valid_targets_mean": 5393.7,
"valid_targets_min": 377
},
{
"epoch": 5.477777777777778,
"grad_norm": 0.47416903570786223,
"learning_rate": 5.505868346096623e-06,
"loss": 0.1842,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17863276600837708,
"step": 2465,
"valid_targets_mean": 5649.1,
"valid_targets_min": 982
},
{
"epoch": 5.488888888888889,
"grad_norm": 0.656500574854733,
"learning_rate": 5.429733607496674e-06,
"loss": 0.161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15013918280601501,
"step": 2470,
"valid_targets_mean": 3773.6,
"valid_targets_min": 442
},
{
"epoch": 5.5,
"grad_norm": 0.5765586477143745,
"learning_rate": 5.354046170040537e-06,
"loss": 0.1734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18464767932891846,
"step": 2475,
"valid_targets_mean": 4310.9,
"valid_targets_min": 359
},
{
"epoch": 5.511111111111111,
"grad_norm": 0.513219195279251,
"learning_rate": 5.278808357301186e-06,
"loss": 0.1707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16760313510894775,
"step": 2480,
"valid_targets_mean": 4913.3,
"valid_targets_min": 259
},
{
"epoch": 5.522222222222222,
"grad_norm": 0.6307469353575386,
"learning_rate": 5.204022479048325e-06,
"loss": 0.1756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18006855249404907,
"step": 2485,
"valid_targets_mean": 3771.8,
"valid_targets_min": 419
},
{
"epoch": 5.533333333333333,
"grad_norm": 0.6063474311283815,
"learning_rate": 5.129690831177425e-06,
"loss": 0.1748,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1837840974330902,
"step": 2490,
"valid_targets_mean": 3647.8,
"valid_targets_min": 524
},
{
"epoch": 5.544444444444444,
"grad_norm": 0.5318846921681258,
"learning_rate": 5.055815695639303e-06,
"loss": 0.1791,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1946224868297577,
"step": 2495,
"valid_targets_mean": 4591.3,
"valid_targets_min": 433
},
{
"epoch": 5.555555555555555,
"grad_norm": 0.593321647760732,
"learning_rate": 4.982399340370017e-06,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18851664662361145,
"step": 2500,
"valid_targets_mean": 4363.1,
"valid_targets_min": 771
},
{
"epoch": 5.566666666666666,
"grad_norm": 0.5435021274855566,
"learning_rate": 4.909444019221274e-06,
"loss": 0.1647,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1688232570886612,
"step": 2505,
"valid_targets_mean": 4495.8,
"valid_targets_min": 328
},
{
"epoch": 5.5777777777777775,
"grad_norm": 0.4945201322111143,
"learning_rate": 4.836951971891215e-06,
"loss": 0.1613,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14214378595352173,
"step": 2510,
"valid_targets_mean": 5189.9,
"valid_targets_min": 1223
},
{
"epoch": 5.588888888888889,
"grad_norm": 0.5455726240398745,
"learning_rate": 4.764925423855669e-06,
"loss": 0.1777,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1754768192768097,
"step": 2515,
"valid_targets_mean": 4304.2,
"valid_targets_min": 479
},
{
"epoch": 5.6,
"grad_norm": 0.6294730140787744,
"learning_rate": 4.693366586299824e-06,
"loss": 0.1817,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15617169439792633,
"step": 2520,
"valid_targets_mean": 3736.4,
"valid_targets_min": 218
},
{
"epoch": 5.611111111111111,
"grad_norm": 0.5250062129007003,
"learning_rate": 4.622277656050369e-06,
"loss": 0.1782,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17956486344337463,
"step": 2525,
"valid_targets_mean": 5230.8,
"valid_targets_min": 2431
},
{
"epoch": 5.622222222222222,
"grad_norm": 0.7086051651918763,
"learning_rate": 4.551660815508012e-06,
"loss": 0.1786,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1606767177581787,
"step": 2530,
"valid_targets_mean": 4573.8,
"valid_targets_min": 285
},
{
"epoch": 5.633333333333333,
"grad_norm": 0.48064688634395325,
"learning_rate": 4.481518232580515e-06,
"loss": 0.1732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1643509864807129,
"step": 2535,
"valid_targets_mean": 5939.2,
"valid_targets_min": 2884
},
{
"epoch": 5.644444444444445,
"grad_norm": 0.5324700467948121,
"learning_rate": 4.411852060616115e-06,
"loss": 0.182,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1770201474428177,
"step": 2540,
"valid_targets_mean": 5397.2,
"valid_targets_min": 283
},
{
"epoch": 5.655555555555556,
"grad_norm": 0.5436206325195655,
"learning_rate": 4.342664438337447e-06,
"loss": 0.1792,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14190086722373962,
"step": 2545,
"valid_targets_mean": 4431.3,
"valid_targets_min": 850
},
{
"epoch": 5.666666666666667,
"grad_norm": 0.4773318240610541,
"learning_rate": 4.273957489775862e-06,
"loss": 0.1726,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15811610221862793,
"step": 2550,
"valid_targets_mean": 4552.0,
"valid_targets_min": 873
},
{
"epoch": 5.677777777777778,
"grad_norm": 0.5208734691655355,
"learning_rate": 4.205733324206216e-06,
"loss": 0.1761,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16764241456985474,
"step": 2555,
"valid_targets_mean": 4251.2,
"valid_targets_min": 777
},
{
"epoch": 5.688888888888889,
"grad_norm": 0.4928403449962443,
"learning_rate": 4.137994036082138e-06,
"loss": 0.1731,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.175938218832016,
"step": 2560,
"valid_targets_mean": 5055.7,
"valid_targets_min": 365
},
{
"epoch": 5.7,
"grad_norm": 0.5244262215031825,
"learning_rate": 4.070741704971726e-06,
"loss": 0.1744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2002006471157074,
"step": 2565,
"valid_targets_mean": 4992.8,
"valid_targets_min": 377
},
{
"epoch": 5.711111111111111,
"grad_norm": 0.661490029880272,
"learning_rate": 4.003978395493682e-06,
"loss": 0.1849,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1974271535873413,
"step": 2570,
"valid_targets_mean": 3547.4,
"valid_targets_min": 490
},
{
"epoch": 5.722222222222222,
"grad_norm": 1.7114236198419897,
"learning_rate": 3.937706157253971e-06,
"loss": 0.1776,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20456838607788086,
"step": 2575,
"valid_targets_mean": 3842.5,
"valid_targets_min": 206
},
{
"epoch": 5.733333333333333,
"grad_norm": 0.6837291674161498,
"learning_rate": 3.871927024782838e-06,
"loss": 0.1722,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14637309312820435,
"step": 2580,
"valid_targets_mean": 4026.7,
"valid_targets_min": 469
},
{
"epoch": 5.7444444444444445,
"grad_norm": 0.7514815970637514,
"learning_rate": 3.80664301747242e-06,
"loss": 0.1705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18412163853645325,
"step": 2585,
"valid_targets_mean": 2986.9,
"valid_targets_min": 295
},
{
"epoch": 5.7555555555555555,
"grad_norm": 0.5388048398379085,
"learning_rate": 3.741856139514706e-06,
"loss": 0.1747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16334733366966248,
"step": 2590,
"valid_targets_mean": 4902.1,
"valid_targets_min": 997
},
{
"epoch": 5.766666666666667,
"grad_norm": 0.5233222496715474,
"learning_rate": 3.677568379840011e-06,
"loss": 0.1621,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16272562742233276,
"step": 2595,
"valid_targets_mean": 4434.1,
"valid_targets_min": 462
},
{
"epoch": 5.777777777777778,
"grad_norm": 0.5705709741002777,
"learning_rate": 3.613781712055935e-06,
"loss": 0.1793,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17690132558345795,
"step": 2600,
"valid_targets_mean": 4264.3,
"valid_targets_min": 325
},
{
"epoch": 5.788888888888889,
"grad_norm": 0.5479756129178118,
"learning_rate": 3.5504980943867538e-06,
"loss": 0.1806,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16624002158641815,
"step": 2605,
"valid_targets_mean": 5080.0,
"valid_targets_min": 530
},
{
"epoch": 5.8,
"grad_norm": 0.6635653151891525,
"learning_rate": 3.487719469613331e-06,
"loss": 0.1811,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2028333991765976,
"step": 2610,
"valid_targets_mean": 3572.5,
"valid_targets_min": 367
},
{
"epoch": 5.811111111111111,
"grad_norm": 0.44665022968089857,
"learning_rate": 3.4254477650134367e-06,
"loss": 0.1714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1494394838809967,
"step": 2615,
"valid_targets_mean": 5941.5,
"valid_targets_min": 755
},
{
"epoch": 5.822222222222222,
"grad_norm": 0.661754201811149,
"learning_rate": 3.3636848923026257e-06,
"loss": 0.17,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18834251165390015,
"step": 2620,
"valid_targets_mean": 2994.5,
"valid_targets_min": 393
},
{
"epoch": 5.833333333333333,
"grad_norm": 0.6849647049787121,
"learning_rate": 3.30243274757549e-06,
"loss": 0.1702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16348251700401306,
"step": 2625,
"valid_targets_mean": 2972.6,
"valid_targets_min": 997
},
{
"epoch": 5.844444444444444,
"grad_norm": 0.6231070446255984,
"learning_rate": 3.2416932112475207e-06,
"loss": 0.1661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15320156514644623,
"step": 2630,
"valid_targets_mean": 4418.4,
"valid_targets_min": 685
},
{
"epoch": 5.855555555555555,
"grad_norm": 0.5713999743603262,
"learning_rate": 3.1814681479973154e-06,
"loss": 0.1538,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16935516893863678,
"step": 2635,
"valid_targets_mean": 4568.5,
"valid_targets_min": 761
},
{
"epoch": 5.866666666666667,
"grad_norm": 1.2779543648973395,
"learning_rate": 3.121759406709386e-06,
"loss": 0.1756,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18024687469005585,
"step": 2640,
"valid_targets_mean": 4624.9,
"valid_targets_min": 310
},
{
"epoch": 5.877777777777778,
"grad_norm": 0.527748772279261,
"learning_rate": 3.062568820417353e-06,
"loss": 0.1785,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19535987079143524,
"step": 2645,
"valid_targets_mean": 5047.5,
"valid_targets_min": 733
},
{
"epoch": 5.888888888888889,
"grad_norm": 0.5667121076341758,
"learning_rate": 3.003898206247704e-06,
"loss": 0.1781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1724199652671814,
"step": 2650,
"valid_targets_mean": 4225.3,
"valid_targets_min": 957
},
{
"epoch": 5.9,
"grad_norm": 0.5448499086146226,
"learning_rate": 2.9457493653639856e-06,
"loss": 0.1633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16028910875320435,
"step": 2655,
"valid_targets_mean": 4420.4,
"valid_targets_min": 2211
},
{
"epoch": 5.911111111111111,
"grad_norm": 0.5833160229189241,
"learning_rate": 2.8881240829115453e-06,
"loss": 0.1798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.173763245344162,
"step": 2660,
"valid_targets_mean": 3861.1,
"valid_targets_min": 549
},
{
"epoch": 5.9222222222222225,
"grad_norm": 0.5633050658584287,
"learning_rate": 2.8310241279626784e-06,
"loss": 0.1798,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18552720546722412,
"step": 2665,
"valid_targets_mean": 4328.1,
"valid_targets_min": 406
},
{
"epoch": 5.933333333333334,
"grad_norm": 0.6139575137201317,
"learning_rate": 2.774451253462356e-06,
"loss": 0.1904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17860981822013855,
"step": 2670,
"valid_targets_mean": 3716.5,
"valid_targets_min": 668
},
{
"epoch": 5.944444444444445,
"grad_norm": 0.5214170522074714,
"learning_rate": 2.718407196174391e-06,
"loss": 0.1661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18364687263965607,
"step": 2675,
"valid_targets_mean": 5053.6,
"valid_targets_min": 238
},
{
"epoch": 5.955555555555556,
"grad_norm": 0.6469395857790785,
"learning_rate": 2.6628936766281375e-06,
"loss": 0.1681,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19230781495571136,
"step": 2680,
"valid_targets_mean": 3862.8,
"valid_targets_min": 742
},
{
"epoch": 5.966666666666667,
"grad_norm": 0.5114626457093362,
"learning_rate": 2.607912399065646e-06,
"loss": 0.1678,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1596902310848236,
"step": 2685,
"valid_targets_mean": 5641.2,
"valid_targets_min": 306
},
{
"epoch": 5.977777777777778,
"grad_norm": 0.5327080602996671,
"learning_rate": 2.5534650513893787e-06,
"loss": 0.1679,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14958997070789337,
"step": 2690,
"valid_targets_mean": 5039.9,
"valid_targets_min": 792
},
{
"epoch": 5.988888888888889,
"grad_norm": 0.6076035884975858,
"learning_rate": 2.4995533051103448e-06,
"loss": 0.1566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15890420973300934,
"step": 2695,
"valid_targets_mean": 3991.2,
"valid_targets_min": 300
},
{
"epoch": 6.0,
"grad_norm": 0.5398509197397203,
"learning_rate": 2.446178815296838e-06,
"loss": 0.1719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.159483402967453,
"step": 2700,
"valid_targets_mean": 4043.2,
"valid_targets_min": 300
},
{
"epoch": 6.011111111111111,
"grad_norm": 0.5913571273369431,
"learning_rate": 2.393343220523581e-06,
"loss": 0.1704,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17935341596603394,
"step": 2705,
"valid_targets_mean": 4581.9,
"valid_targets_min": 1063
},
{
"epoch": 6.022222222222222,
"grad_norm": 0.66752029438305,
"learning_rate": 2.3410481428214602e-06,
"loss": 0.1615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1461348831653595,
"step": 2710,
"valid_targets_mean": 3675.8,
"valid_targets_min": 489
},
{
"epoch": 6.033333333333333,
"grad_norm": 0.5056815928149037,
"learning_rate": 2.2892951876276983e-06,
"loss": 0.1599,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17451688647270203,
"step": 2715,
"valid_targets_mean": 4982.4,
"valid_targets_min": 193
},
{
"epoch": 6.044444444444444,
"grad_norm": 0.5787722267858811,
"learning_rate": 2.2380859437365855e-06,
"loss": 0.168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1561371088027954,
"step": 2720,
"valid_targets_mean": 4155.3,
"valid_targets_min": 467
},
{
"epoch": 6.055555555555555,
"grad_norm": 0.5915320981288767,
"learning_rate": 2.187421983250695e-06,
"loss": 0.1584,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16373848915100098,
"step": 2725,
"valid_targets_mean": 3869.1,
"valid_targets_min": 291
},
{
"epoch": 6.066666666666666,
"grad_norm": 0.6317568161952004,
"learning_rate": 2.1373048615326385e-06,
"loss": 0.156,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1501501202583313,
"step": 2730,
"valid_targets_mean": 3157.3,
"valid_targets_min": 657
},
{
"epoch": 6.0777777777777775,
"grad_norm": 0.5928916539006913,
"learning_rate": 2.0877361171572953e-06,
"loss": 0.1625,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16607831418514252,
"step": 2735,
"valid_targets_mean": 5381.1,
"valid_targets_min": 366
},
{
"epoch": 6.088888888888889,
"grad_norm": 0.5449211144644974,
"learning_rate": 2.0387172718645853e-06,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14824213087558746,
"step": 2740,
"valid_targets_mean": 4073.2,
"valid_targets_min": 901
},
{
"epoch": 6.1,
"grad_norm": 0.5683455633442686,
"learning_rate": 1.990249830512756e-06,
"loss": 0.153,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16184581816196442,
"step": 2745,
"valid_targets_mean": 4750.2,
"valid_targets_min": 325
},
{
"epoch": 6.111111111111111,
"grad_norm": 0.42667335878897045,
"learning_rate": 1.942335281032188e-06,
"loss": 0.1524,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12961864471435547,
"step": 2750,
"valid_targets_mean": 6349.4,
"valid_targets_min": 2625
},
{
"epoch": 6.122222222222222,
"grad_norm": 0.5436823867806183,
"learning_rate": 1.8949750943797051e-06,
"loss": 0.1562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14522790908813477,
"step": 2755,
"valid_targets_mean": 4269.0,
"valid_targets_min": 324
},
{
"epoch": 6.133333333333334,
"grad_norm": 0.6448799255936731,
"learning_rate": 1.8481707244934232e-06,
"loss": 0.1582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15584322810173035,
"step": 2760,
"valid_targets_mean": 4016.6,
"valid_targets_min": 445
},
{
"epoch": 6.144444444444445,
"grad_norm": 0.5421598615393723,
"learning_rate": 1.8019236082481063e-06,
"loss": 0.1642,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1767968237400055,
"step": 2765,
"valid_targets_mean": 4966.3,
"valid_targets_min": 734
},
{
"epoch": 6.155555555555556,
"grad_norm": 0.5685111593837506,
"learning_rate": 1.7562351654110776e-06,
"loss": 0.1602,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1697826236486435,
"step": 2770,
"valid_targets_mean": 4455.9,
"valid_targets_min": 263
},
{
"epoch": 6.166666666666667,
"grad_norm": 0.5013193289509864,
"learning_rate": 1.711106798598603e-06,
"loss": 0.1553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14647771418094635,
"step": 2775,
"valid_targets_mean": 4696.4,
"valid_targets_min": 1329
},
{
"epoch": 6.177777777777778,
"grad_norm": 0.5765680957861777,
"learning_rate": 1.6665398932328615e-06,
"loss": 0.1843,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21099118888378143,
"step": 2780,
"valid_targets_mean": 4673.8,
"valid_targets_min": 328
},
{
"epoch": 6.188888888888889,
"grad_norm": 0.6071624204964592,
"learning_rate": 1.6225358174993866e-06,
"loss": 0.1653,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15273860096931458,
"step": 2785,
"valid_targets_mean": 4286.6,
"valid_targets_min": 537
},
{
"epoch": 6.2,
"grad_norm": 0.6182352698209433,
"learning_rate": 1.5790959223050761e-06,
"loss": 0.1695,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19237971305847168,
"step": 2790,
"valid_targets_mean": 3695.6,
"valid_targets_min": 344
},
{
"epoch": 6.211111111111111,
"grad_norm": 0.588213406617714,
"learning_rate": 1.5362215412367198e-06,
"loss": 0.1601,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15135380625724792,
"step": 2795,
"valid_targets_mean": 3756.7,
"valid_targets_min": 1214
},
{
"epoch": 6.222222222222222,
"grad_norm": 0.6346814814248117,
"learning_rate": 1.493913990520066e-06,
"loss": 0.1853,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20901688933372498,
"step": 2800,
"valid_targets_mean": 4734.8,
"valid_targets_min": 360
},
{
"epoch": 6.233333333333333,
"grad_norm": 0.6257271615557687,
"learning_rate": 1.4521745689793942e-06,
"loss": 0.1676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15938270092010498,
"step": 2805,
"valid_targets_mean": 5421.2,
"valid_targets_min": 635
},
{
"epoch": 6.2444444444444445,
"grad_norm": 0.559553595120576,
"learning_rate": 1.4110045579976638e-06,
"loss": 0.1707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16130048036575317,
"step": 2810,
"valid_targets_mean": 4333.5,
"valid_targets_min": 586
},
{
"epoch": 6.2555555555555555,
"grad_norm": 0.5866773758858824,
"learning_rate": 1.3704052214771513e-06,
"loss": 0.1773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20442438125610352,
"step": 2815,
"valid_targets_mean": 4559.4,
"valid_targets_min": 491
},
{
"epoch": 6.266666666666667,
"grad_norm": 0.5344071459257823,
"learning_rate": 1.3303778058006844e-06,
"loss": 0.171,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1532367765903473,
"step": 2820,
"valid_targets_mean": 4964.8,
"valid_targets_min": 1225
},
{
"epoch": 6.277777777777778,
"grad_norm": 0.4690750876544389,
"learning_rate": 1.2909235397933429e-06,
"loss": 0.1814,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18918469548225403,
"step": 2825,
"valid_targets_mean": 6092.4,
"valid_targets_min": 809
},
{
"epoch": 6.288888888888889,
"grad_norm": 0.5452380646504179,
"learning_rate": 1.2520436346847498e-06,
"loss": 0.1513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1750721037387848,
"step": 2830,
"valid_targets_mean": 4615.1,
"valid_targets_min": 231
},
{
"epoch": 6.3,
"grad_norm": 0.7863745243804952,
"learning_rate": 1.213739284071891e-06,
"loss": 0.158,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1903843879699707,
"step": 2835,
"valid_targets_mean": 3657.9,
"valid_targets_min": 783
},
{
"epoch": 6.311111111111111,
"grad_norm": 0.5931382218715537,
"learning_rate": 1.176011663882466e-06,
"loss": 0.1702,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17225001752376556,
"step": 2840,
"valid_targets_mean": 3728.6,
"valid_targets_min": 394
},
{
"epoch": 6.322222222222222,
"grad_norm": 0.5189346871656337,
"learning_rate": 1.1388619323387884e-06,
"loss": 0.1691,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17915725708007812,
"step": 2845,
"valid_targets_mean": 5191.2,
"valid_targets_min": 647
},
{
"epoch": 6.333333333333333,
"grad_norm": 0.5685337335007038,
"learning_rate": 1.1022912299222387e-06,
"loss": 0.1712,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16851413249969482,
"step": 2850,
"valid_targets_mean": 4485.8,
"valid_targets_min": 855
},
{
"epoch": 6.344444444444444,
"grad_norm": 0.7195861370160256,
"learning_rate": 1.0663006793382214e-06,
"loss": 0.1607,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1574927270412445,
"step": 2855,
"valid_targets_mean": 3559.8,
"valid_targets_min": 576
},
{
"epoch": 6.355555555555555,
"grad_norm": 0.5762158096256034,
"learning_rate": 1.0308913854817425e-06,
"loss": 0.1641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1543298363685608,
"step": 2860,
"valid_targets_mean": 4227.9,
"valid_targets_min": 450
},
{
"epoch": 6.366666666666666,
"grad_norm": 0.581243646224178,
"learning_rate": 9.960644354034544e-07,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16603925824165344,
"step": 2865,
"valid_targets_mean": 3848.9,
"valid_targets_min": 539
},
{
"epoch": 6.377777777777778,
"grad_norm": 0.5860189314743979,
"learning_rate": 9.618208982763045e-07,
"loss": 0.168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17424118518829346,
"step": 2870,
"valid_targets_mean": 4603.6,
"valid_targets_min": 820
},
{
"epoch": 6.388888888888889,
"grad_norm": 0.5260553051504206,
"learning_rate": 9.281618253626967e-07,
"loss": 0.1904,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1587425321340561,
"step": 2875,
"valid_targets_mean": 4868.8,
"valid_targets_min": 623
},
{
"epoch": 6.4,
"grad_norm": 0.5709882539258184,
"learning_rate": 8.950882499822322e-07,
"loss": 0.157,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18391573429107666,
"step": 2880,
"valid_targets_mean": 4826.9,
"valid_targets_min": 955
},
{
"epoch": 6.411111111111111,
"grad_norm": 0.5644788262306717,
"learning_rate": 8.626011874799723e-07,
"loss": 0.1604,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18057206273078918,
"step": 2885,
"valid_targets_mean": 4512.6,
"valid_targets_min": 219
},
{
"epoch": 6.4222222222222225,
"grad_norm": 0.576346831291659,
"learning_rate": 8.307016351952857e-07,
"loss": 0.1563,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.166375070810318,
"step": 2890,
"valid_targets_mean": 4210.4,
"valid_targets_min": 1171
},
{
"epoch": 6.433333333333334,
"grad_norm": 0.5757462663579878,
"learning_rate": 7.993905724312156e-07,
"loss": 0.1768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1599673181772232,
"step": 2895,
"valid_targets_mean": 4139.4,
"valid_targets_min": 566
},
{
"epoch": 6.444444444444445,
"grad_norm": 0.5623689294928453,
"learning_rate": 7.686689604244191e-07,
"loss": 0.1773,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19040706753730774,
"step": 2900,
"valid_targets_mean": 4728.7,
"valid_targets_min": 992
},
{
"epoch": 6.455555555555556,
"grad_norm": 0.598246014236927,
"learning_rate": 7.385377423156592e-07,
"loss": 0.1605,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1700592339038849,
"step": 2905,
"valid_targets_mean": 3927.4,
"valid_targets_min": 507
},
{
"epoch": 6.466666666666667,
"grad_norm": 0.6203480477245517,
"learning_rate": 7.0899784312086e-07,
"loss": 0.1654,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1759098470211029,
"step": 2910,
"valid_targets_mean": 3698.8,
"valid_targets_min": 923
},
{
"epoch": 6.477777777777778,
"grad_norm": 0.7017472734434613,
"learning_rate": 6.800501697026817e-07,
"loss": 0.1767,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1844363808631897,
"step": 2915,
"valid_targets_mean": 3652.4,
"valid_targets_min": 745
},
{
"epoch": 6.488888888888889,
"grad_norm": 0.8080719060022912,
"learning_rate": 6.516956107427241e-07,
"loss": 0.169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1818864643573761,
"step": 2920,
"valid_targets_mean": 3598.4,
"valid_targets_min": 322
},
{
"epoch": 6.5,
"grad_norm": 0.6080050661997464,
"learning_rate": 6.239350367141872e-07,
"loss": 0.1661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1631316989660263,
"step": 2925,
"valid_targets_mean": 3850.2,
"valid_targets_min": 577
},
{
"epoch": 6.511111111111111,
"grad_norm": 0.5218209158596074,
"learning_rate": 5.967692998552088e-07,
"loss": 0.1734,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1616230010986328,
"step": 2930,
"valid_targets_mean": 4195.7,
"valid_targets_min": 214
},
{
"epoch": 6.522222222222222,
"grad_norm": 0.583948409979544,
"learning_rate": 5.701992341426499e-07,
"loss": 0.1708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16667166352272034,
"step": 2935,
"valid_targets_mean": 4227.6,
"valid_targets_min": 443
},
{
"epoch": 6.533333333333333,
"grad_norm": 0.6357686417725928,
"learning_rate": 5.442256552665326e-07,
"loss": 0.1612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1789218783378601,
"step": 2940,
"valid_targets_mean": 4223.6,
"valid_targets_min": 549
},
{
"epoch": 6.544444444444444,
"grad_norm": 0.5545850268570705,
"learning_rate": 5.188493606049672e-07,
"loss": 0.15,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16391915082931519,
"step": 2945,
"valid_targets_mean": 4468.6,
"valid_targets_min": 306
},
{
"epoch": 6.555555555555555,
"grad_norm": 0.5370700566695868,
"learning_rate": 4.940711291996891e-07,
"loss": 0.1615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1655001938343048,
"step": 2950,
"valid_targets_mean": 4653.1,
"valid_targets_min": 524
},
{
"epoch": 6.566666666666666,
"grad_norm": 0.5816674164537832,
"learning_rate": 4.698917217321408e-07,
"loss": 0.1593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16322006285190582,
"step": 2955,
"valid_targets_mean": 4279.8,
"valid_targets_min": 791
},
{
"epoch": 6.5777777777777775,
"grad_norm": 0.591268448436149,
"learning_rate": 4.4631188050011654e-07,
"loss": 0.1577,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15823054313659668,
"step": 2960,
"valid_targets_mean": 4439.2,
"valid_targets_min": 299
},
{
"epoch": 6.588888888888889,
"grad_norm": 0.5923470499206334,
"learning_rate": 4.2333232939498094e-07,
"loss": 0.163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15806478261947632,
"step": 2965,
"valid_targets_mean": 4299.8,
"valid_targets_min": 557
},
{
"epoch": 6.6,
"grad_norm": 0.604171037099629,
"learning_rate": 4.009537738794289e-07,
"loss": 0.1624,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18534332513809204,
"step": 2970,
"valid_targets_mean": 3794.3,
"valid_targets_min": 1163
},
{
"epoch": 6.611111111111111,
"grad_norm": 0.5959118836475591,
"learning_rate": 3.791769009658497e-07,
"loss": 0.1744,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17430004477500916,
"step": 2975,
"valid_targets_mean": 4548.9,
"valid_targets_min": 689
},
{
"epoch": 6.622222222222222,
"grad_norm": 0.7602527640872502,
"learning_rate": 3.5800237919522363e-07,
"loss": 0.1696,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1884760558605194,
"step": 2980,
"valid_targets_mean": 4584.7,
"valid_targets_min": 1115
},
{
"epoch": 6.633333333333333,
"grad_norm": 0.5214769514587413,
"learning_rate": 3.3743085861659643e-07,
"loss": 0.1768,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16529062390327454,
"step": 2985,
"valid_targets_mean": 4972.1,
"valid_targets_min": 662
},
{
"epoch": 6.644444444444445,
"grad_norm": 0.5344722245636476,
"learning_rate": 3.1746297076713504e-07,
"loss": 0.164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14969675242900848,
"step": 2990,
"valid_targets_mean": 4986.3,
"valid_targets_min": 1685
},
{
"epoch": 6.655555555555556,
"grad_norm": 0.5351628577936627,
"learning_rate": 2.9809932865271893e-07,
"loss": 0.1715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16871917247772217,
"step": 2995,
"valid_targets_mean": 4784.9,
"valid_targets_min": 691
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.5030921817708136,
"learning_rate": 2.793405267291505e-07,
"loss": 0.1637,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15890008211135864,
"step": 3000,
"valid_targets_mean": 5473.4,
"valid_targets_min": 1108
},
{
"epoch": 6.677777777777778,
"grad_norm": 0.6571885910780609,
"learning_rate": 2.6118714088386954e-07,
"loss": 0.168,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18136438727378845,
"step": 3005,
"valid_targets_mean": 3401.2,
"valid_targets_min": 257
},
{
"epoch": 6.688888888888889,
"grad_norm": 0.6046227631736789,
"learning_rate": 2.436397284183123e-07,
"loss": 0.166,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1641228199005127,
"step": 3010,
"valid_targets_mean": 3765.2,
"valid_targets_min": 695
},
{
"epoch": 6.7,
"grad_norm": 0.5844047544930174,
"learning_rate": 2.2669882803076916e-07,
"loss": 0.164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15143552422523499,
"step": 3015,
"valid_targets_mean": 4498.2,
"valid_targets_min": 672
},
{
"epoch": 6.711111111111111,
"grad_norm": 0.6149713586123504,
"learning_rate": 2.1036495979986692e-07,
"loss": 0.158,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15604856610298157,
"step": 3020,
"valid_targets_mean": 3861.8,
"valid_targets_min": 225
},
{
"epoch": 6.722222222222222,
"grad_norm": 0.5723560396309636,
"learning_rate": 1.9463862516859277e-07,
"loss": 0.1565,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12534162402153015,
"step": 3025,
"valid_targets_mean": 3602.2,
"valid_targets_min": 401
},
{
"epoch": 6.733333333333333,
"grad_norm": 0.4850032616094909,
"learning_rate": 1.7952030692891086e-07,
"loss": 0.165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1640266627073288,
"step": 3030,
"valid_targets_mean": 4796.8,
"valid_targets_min": 802
},
{
"epoch": 6.7444444444444445,
"grad_norm": 0.5571942927801525,
"learning_rate": 1.6501046920692986e-07,
"loss": 0.1747,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18630409240722656,
"step": 3035,
"valid_targets_mean": 4979.9,
"valid_targets_min": 374
},
{
"epoch": 6.7555555555555555,
"grad_norm": 0.6261160646072778,
"learning_rate": 1.511095574486543e-07,
"loss": 0.1509,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13854405283927917,
"step": 3040,
"valid_targets_mean": 3824.3,
"valid_targets_min": 449
},
{
"epoch": 6.766666666666667,
"grad_norm": 0.6318415495546549,
"learning_rate": 1.378179984063177e-07,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.186091810464859,
"step": 3045,
"valid_targets_mean": 4292.3,
"valid_targets_min": 389
},
{
"epoch": 6.777777777777778,
"grad_norm": 0.6057679446128716,
"learning_rate": 1.2513620012528427e-07,
"loss": 0.1819,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18621055781841278,
"step": 3050,
"valid_targets_mean": 4053.4,
"valid_targets_min": 289
},
{
"epoch": 6.788888888888889,
"grad_norm": 0.8744644549026661,
"learning_rate": 1.1306455193150323e-07,
"loss": 0.1764,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20955529808998108,
"step": 3055,
"valid_targets_mean": 3671.4,
"valid_targets_min": 482
},
{
"epoch": 6.8,
"grad_norm": 0.6599223412568986,
"learning_rate": 1.0160342441957626e-07,
"loss": 0.1801,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19978420436382294,
"step": 3060,
"valid_targets_mean": 3478.9,
"valid_targets_min": 300
},
{
"epoch": 6.811111111111111,
"grad_norm": 0.5322339408865758,
"learning_rate": 9.07531694413688e-08,
"loss": 0.1676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14194843173027039,
"step": 3065,
"valid_targets_mean": 5484.3,
"valid_targets_min": 2467
},
{
"epoch": 6.822222222222222,
"grad_norm": 0.5448297735203173,
"learning_rate": 8.051412009521864e-08,
"loss": 0.1707,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15910744667053223,
"step": 3070,
"valid_targets_mean": 4950.7,
"valid_targets_min": 750
},
{
"epoch": 6.833333333333333,
"grad_norm": 0.5207228811956566,
"learning_rate": 7.08865907156997e-08,
"loss": 0.165,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15996085107326508,
"step": 3075,
"valid_targets_mean": 5696.6,
"valid_targets_min": 1801
},
{
"epoch": 6.844444444444444,
"grad_norm": 0.588737424620841,
"learning_rate": 6.187087686397641e-08,
"loss": 0.1729,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16847629845142365,
"step": 3080,
"valid_targets_mean": 3962.7,
"valid_targets_min": 243
},
{
"epoch": 6.855555555555555,
"grad_norm": 0.8953529078704728,
"learning_rate": 5.3467255318726544e-08,
"loss": 0.1649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1842920035123825,
"step": 3085,
"valid_targets_mean": 3766.0,
"valid_targets_min": 436
},
{
"epoch": 6.866666666666667,
"grad_norm": 0.6312232512655556,
"learning_rate": 4.567598406765461e-08,
"loss": 0.1566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14916302263736725,
"step": 3090,
"valid_targets_mean": 4468.3,
"valid_targets_min": 292
},
{
"epoch": 6.877777777777778,
"grad_norm": 0.5616637532565867,
"learning_rate": 3.84973022995605e-08,
"loss": 0.1783,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13993391394615173,
"step": 3095,
"valid_targets_mean": 4598.1,
"valid_targets_min": 479
},
{
"epoch": 6.888888888888889,
"grad_norm": 0.5833668585034318,
"learning_rate": 3.193143039700086e-08,
"loss": 0.1655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1864895224571228,
"step": 3100,
"valid_targets_mean": 4208.8,
"valid_targets_min": 731
},
{
"epoch": 6.9,
"grad_norm": 0.571580775093832,
"learning_rate": 2.597856992952341e-08,
"loss": 0.1659,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16089646518230438,
"step": 3105,
"valid_targets_mean": 5259.4,
"valid_targets_min": 1133
},
{
"epoch": 6.911111111111111,
"grad_norm": 0.5133595187461827,
"learning_rate": 2.063890364748078e-08,
"loss": 0.1642,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1508500874042511,
"step": 3110,
"valid_targets_mean": 5170.9,
"valid_targets_min": 1010
},
{
"epoch": 6.9222222222222225,
"grad_norm": 0.5585279295675786,
"learning_rate": 1.5912595476414993e-08,
"loss": 0.169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17111481726169586,
"step": 3115,
"valid_targets_mean": 4617.8,
"valid_targets_min": 589
},
{
"epoch": 6.933333333333334,
"grad_norm": 0.5333435138003515,
"learning_rate": 1.1799790512030395e-08,
"loss": 0.1611,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14488770067691803,
"step": 3120,
"valid_targets_mean": 4679.4,
"valid_targets_min": 835
},
{
"epoch": 6.944444444444445,
"grad_norm": 0.5560785893229921,
"learning_rate": 8.300615015734981e-09,
"loss": 0.1635,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14593376219272614,
"step": 3125,
"valid_targets_mean": 4246.2,
"valid_targets_min": 616
},
{
"epoch": 6.955555555555556,
"grad_norm": 0.7126140858082262,
"learning_rate": 5.415176410765721e-09,
"loss": 0.1727,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18622633814811707,
"step": 3130,
"valid_targets_mean": 3116.1,
"valid_targets_min": 263
},
{
"epoch": 6.966666666666667,
"grad_norm": 0.5293643056538982,
"learning_rate": 3.1435632788956448e-09,
"loss": 0.1711,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14810457825660706,
"step": 3135,
"valid_targets_mean": 4385.9,
"valid_targets_min": 369
},
{
"epoch": 6.977777777777778,
"grad_norm": 0.6565261395938169,
"learning_rate": 1.4858453577071275e-09,
"loss": 0.1593,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16837093234062195,
"step": 3140,
"valid_targets_mean": 3301.8,
"valid_targets_min": 361
},
{
"epoch": 6.988888888888889,
"grad_norm": 0.53850752681232,
"learning_rate": 4.4207353845360234e-10,
"loss": 0.1676,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17763689160346985,
"step": 3145,
"valid_targets_mean": 5262.4,
"valid_targets_min": 2341
},
{
"epoch": 7.0,
"grad_norm": 0.6203043399932429,
"learning_rate": 1.2279864494146865e-11,
"loss": 0.1667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19432786107063293,
"step": 3150,
"valid_targets_mean": 4280.6,
"valid_targets_min": 315
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19432786107063293,
"step": 3150,
"total_flos": 1055465597173760.0,
"train_loss": 0.21853318925887819,
"train_runtime": 17876.6321,
"train_samples_per_second": 2.818,
"train_steps_per_second": 0.176,
"valid_targets_mean": 4280.6,
"valid_targets_min": 315
}
],
"logging_steps": 5,
"max_steps": 3150,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1055465597173760.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}