diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6977 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 3150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.011111111111111112, + "grad_norm": 13.408544798581621, + "learning_rate": 5.07936507936508e-07, + "loss": 0.6676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5751245617866516, + "step": 5, + "valid_targets_mean": 5385.7, + "valid_targets_min": 310 + }, + { + "epoch": 0.022222222222222223, + "grad_norm": 16.21108259140317, + "learning_rate": 1.142857142857143e-06, + "loss": 0.658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.687087893486023, + "step": 10, + "valid_targets_mean": 3831.0, + "valid_targets_min": 293 + }, + { + "epoch": 0.03333333333333333, + "grad_norm": 12.338215088436316, + "learning_rate": 1.777777777777778e-06, + "loss": 0.6536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6238576173782349, + "step": 15, + "valid_targets_mean": 4839.5, + "valid_targets_min": 752 + }, + { + "epoch": 0.044444444444444446, + "grad_norm": 9.017393522713887, + "learning_rate": 2.412698412698413e-06, + "loss": 0.6353, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5550223588943481, + "step": 20, + "valid_targets_mean": 4619.8, + "valid_targets_min": 599 + }, + { + "epoch": 0.05555555555555555, + "grad_norm": 5.309166517130694, + "learning_rate": 3.047619047619048e-06, + "loss": 0.5386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5435296893119812, + "step": 25, + "valid_targets_mean": 4110.9, + "valid_targets_min": 723 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 2.792780616604098, + "learning_rate": 3.6825396825396833e-06, + "loss": 0.5029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.46670812368392944, + "step": 30, + "valid_targets_mean": 3455.2, + "valid_targets_min": 295 + }, + { + "epoch": 0.07777777777777778, + "grad_norm": 1.5959020052891244, + "learning_rate": 4.317460317460318e-06, + "loss": 0.4468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4236889183521271, + "step": 35, + "valid_targets_mean": 5468.6, + "valid_targets_min": 376 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 1.0408106474791263, + "learning_rate": 4.952380952380953e-06, + "loss": 0.4331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41928115487098694, + "step": 40, + "valid_targets_mean": 5478.6, + "valid_targets_min": 1395 + }, + { + "epoch": 0.1, + "grad_norm": 0.871196479469939, + "learning_rate": 5.5873015873015876e-06, + "loss": 0.4438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40255558490753174, + "step": 45, + "valid_targets_mean": 5138.5, + "valid_targets_min": 1811 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 0.8029816404229763, + "learning_rate": 6.222222222222223e-06, + "loss": 0.4316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3899801969528198, + "step": 50, + "valid_targets_mean": 4666.9, + "valid_targets_min": 555 + }, + { + "epoch": 0.12222222222222222, + "grad_norm": 0.7515486206211541, + "learning_rate": 6.857142857142858e-06, + "loss": 0.4538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4362230896949768, + "step": 55, + "valid_targets_mean": 4455.1, + "valid_targets_min": 397 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.5887418945104569, + "learning_rate": 7.492063492063493e-06, + "loss": 0.3969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3481084108352661, + "step": 60, + "valid_targets_mean": 5121.1, + "valid_targets_min": 322 + }, + { + "epoch": 0.14444444444444443, + "grad_norm": 0.6245277781771107, + "learning_rate": 8.126984126984128e-06, + "loss": 0.3905, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36860743165016174, + "step": 65, + "valid_targets_mean": 3914.1, + "valid_targets_min": 269 + }, + { + "epoch": 0.15555555555555556, + "grad_norm": 0.5810023779977105, + "learning_rate": 8.761904761904763e-06, + "loss": 0.3874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3317573368549347, + "step": 70, + "valid_targets_mean": 4313.6, + "valid_targets_min": 1168 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 1.0288706410047126, + "learning_rate": 9.396825396825398e-06, + "loss": 0.356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3530646562576294, + "step": 75, + "valid_targets_mean": 3485.5, + "valid_targets_min": 510 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.6056824865658866, + "learning_rate": 1.0031746031746033e-05, + "loss": 0.3536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3671146035194397, + "step": 80, + "valid_targets_mean": 3908.6, + "valid_targets_min": 257 + }, + { + "epoch": 0.18888888888888888, + "grad_norm": 0.6162370696114597, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.3733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3743559718132019, + "step": 85, + "valid_targets_mean": 4694.6, + "valid_targets_min": 435 + }, + { + "epoch": 0.2, + "grad_norm": 0.5931909767383229, + "learning_rate": 1.1301587301587302e-05, + "loss": 0.34, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3645175099372864, + "step": 90, + "valid_targets_mean": 4516.5, + "valid_targets_min": 630 + }, + { + "epoch": 0.2111111111111111, + "grad_norm": 0.5852862045298343, + "learning_rate": 1.1936507936507937e-05, + "loss": 0.3612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33980345726013184, + "step": 95, + "valid_targets_mean": 4287.4, + "valid_targets_min": 377 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 0.591535838571353, + "learning_rate": 1.2571428571428572e-05, + "loss": 0.3496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3416215479373932, + "step": 100, + "valid_targets_mean": 3520.2, + "valid_targets_min": 322 + }, + { + "epoch": 0.23333333333333334, + "grad_norm": 0.5517118398063217, + "learning_rate": 1.3206349206349206e-05, + "loss": 0.3417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3045486807823181, + "step": 105, + "valid_targets_mean": 3936.1, + "valid_targets_min": 355 + }, + { + "epoch": 0.24444444444444444, + "grad_norm": 0.6036669845712245, + "learning_rate": 1.3841269841269843e-05, + "loss": 0.3631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.37737342715263367, + "step": 110, + "valid_targets_mean": 4156.3, + "valid_targets_min": 352 + }, + { + "epoch": 0.25555555555555554, + "grad_norm": 0.557851756149835, + "learning_rate": 1.4476190476190478e-05, + "loss": 0.33, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31270942091941833, + "step": 115, + "valid_targets_mean": 4740.2, + "valid_targets_min": 764 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.5061593555204397, + "learning_rate": 1.5111111111111112e-05, + "loss": 0.3383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3117508292198181, + "step": 120, + "valid_targets_mean": 5759.2, + "valid_targets_min": 836 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 0.5910898872360414, + "learning_rate": 1.5746031746031745e-05, + "loss": 0.3198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30763545632362366, + "step": 125, + "valid_targets_mean": 4890.5, + "valid_targets_min": 872 + }, + { + "epoch": 0.28888888888888886, + "grad_norm": 0.44647524167329333, + "learning_rate": 1.6380952380952384e-05, + "loss": 0.302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27717962861061096, + "step": 130, + "valid_targets_mean": 5430.8, + "valid_targets_min": 869 + }, + { + "epoch": 0.3, + "grad_norm": 0.5879317402321064, + "learning_rate": 1.7015873015873018e-05, + "loss": 0.3243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30784285068511963, + "step": 135, + "valid_targets_mean": 3904.9, + "valid_targets_min": 393 + }, + { + "epoch": 0.3111111111111111, + "grad_norm": 0.5605756164325479, + "learning_rate": 1.7650793650793653e-05, + "loss": 0.2979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2845287322998047, + "step": 140, + "valid_targets_mean": 4072.9, + "valid_targets_min": 517 + }, + { + "epoch": 0.32222222222222224, + "grad_norm": 0.5215552035079208, + "learning_rate": 1.8285714285714288e-05, + "loss": 0.3149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26664093136787415, + "step": 145, + "valid_targets_mean": 4416.2, + "valid_targets_min": 335 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.5674313964489882, + "learning_rate": 1.8920634920634923e-05, + "loss": 0.3226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3291376233100891, + "step": 150, + "valid_targets_mean": 5014.2, + "valid_targets_min": 251 + }, + { + "epoch": 0.34444444444444444, + "grad_norm": 0.5110719079947695, + "learning_rate": 1.9555555555555557e-05, + "loss": 0.3088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29836922883987427, + "step": 155, + "valid_targets_mean": 4633.4, + "valid_targets_min": 656 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.6271829324260068, + "learning_rate": 2.0190476190476192e-05, + "loss": 0.3173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.40676355361938477, + "step": 160, + "valid_targets_mean": 4353.1, + "valid_targets_min": 301 + }, + { + "epoch": 0.36666666666666664, + "grad_norm": 0.5661101605652876, + "learning_rate": 2.082539682539683e-05, + "loss": 0.3187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30450916290283203, + "step": 165, + "valid_targets_mean": 4222.2, + "valid_targets_min": 671 + }, + { + "epoch": 0.37777777777777777, + "grad_norm": 0.5792882504021095, + "learning_rate": 2.146031746031746e-05, + "loss": 0.2887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3169078230857849, + "step": 170, + "valid_targets_mean": 4392.9, + "valid_targets_min": 286 + }, + { + "epoch": 0.3888888888888889, + "grad_norm": 0.5061884693546234, + "learning_rate": 2.20952380952381e-05, + "loss": 0.3388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32877248525619507, + "step": 175, + "valid_targets_mean": 4587.4, + "valid_targets_min": 603 + }, + { + "epoch": 0.4, + "grad_norm": 0.55673810264919, + "learning_rate": 2.273015873015873e-05, + "loss": 0.3079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3139079511165619, + "step": 180, + "valid_targets_mean": 5031.8, + "valid_targets_min": 391 + }, + { + "epoch": 0.4111111111111111, + "grad_norm": 0.7128274083633475, + "learning_rate": 2.336507936507937e-05, + "loss": 0.3189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3152565360069275, + "step": 185, + "valid_targets_mean": 2480.9, + "valid_targets_min": 339 + }, + { + "epoch": 0.4222222222222222, + "grad_norm": 0.5610727534792233, + "learning_rate": 2.4e-05, + "loss": 0.3086, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3390665054321289, + "step": 190, + "valid_targets_mean": 4430.2, + "valid_targets_min": 591 + }, + { + "epoch": 0.43333333333333335, + "grad_norm": 0.8939323722899397, + "learning_rate": 2.463492063492064e-05, + "loss": 0.3016, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3454466760158539, + "step": 195, + "valid_targets_mean": 3394.8, + "valid_targets_min": 693 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 1.0429529339999148, + "learning_rate": 2.526984126984127e-05, + "loss": 0.3146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2936026453971863, + "step": 200, + "valid_targets_mean": 5333.9, + "valid_targets_min": 883 + }, + { + "epoch": 0.45555555555555555, + "grad_norm": 0.5853756503183534, + "learning_rate": 2.5904761904761908e-05, + "loss": 0.3152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3078632950782776, + "step": 205, + "valid_targets_mean": 3632.1, + "valid_targets_min": 636 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.5715675965569004, + "learning_rate": 2.653968253968254e-05, + "loss": 0.3122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3024342656135559, + "step": 210, + "valid_targets_mean": 4422.1, + "valid_targets_min": 328 + }, + { + "epoch": 0.4777777777777778, + "grad_norm": 0.6821942507785, + "learning_rate": 2.7174603174603178e-05, + "loss": 0.3154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36502134799957275, + "step": 215, + "valid_targets_mean": 3621.6, + "valid_targets_min": 239 + }, + { + "epoch": 0.4888888888888889, + "grad_norm": 0.6021391191144772, + "learning_rate": 2.780952380952381e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25724563002586365, + "step": 220, + "valid_targets_mean": 4738.3, + "valid_targets_min": 656 + }, + { + "epoch": 0.5, + "grad_norm": 0.5234960619653491, + "learning_rate": 2.8444444444444447e-05, + "loss": 0.3075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33516725897789, + "step": 225, + "valid_targets_mean": 4865.0, + "valid_targets_min": 231 + }, + { + "epoch": 0.5111111111111111, + "grad_norm": 0.547550500956153, + "learning_rate": 2.9079365079365082e-05, + "loss": 0.2884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29283851385116577, + "step": 230, + "valid_targets_mean": 4564.4, + "valid_targets_min": 1213 + }, + { + "epoch": 0.5222222222222223, + "grad_norm": 0.5897013069621678, + "learning_rate": 2.9714285714285717e-05, + "loss": 0.2937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30815833806991577, + "step": 235, + "valid_targets_mean": 3828.2, + "valid_targets_min": 325 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 0.5735263579255174, + "learning_rate": 3.034920634920635e-05, + "loss": 0.2914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30891531705856323, + "step": 240, + "valid_targets_mean": 4256.2, + "valid_targets_min": 664 + }, + { + "epoch": 0.5444444444444444, + "grad_norm": 0.5249052908854333, + "learning_rate": 3.098412698412699e-05, + "loss": 0.308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26125839352607727, + "step": 245, + "valid_targets_mean": 4620.4, + "valid_targets_min": 995 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.5405962329977203, + "learning_rate": 3.161904761904762e-05, + "loss": 0.2811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26012593507766724, + "step": 250, + "valid_targets_mean": 4433.9, + "valid_targets_min": 629 + }, + { + "epoch": 0.5666666666666667, + "grad_norm": 0.8775775341101245, + "learning_rate": 3.225396825396826e-05, + "loss": 0.2898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23986881971359253, + "step": 255, + "valid_targets_mean": 3096.6, + "valid_targets_min": 606 + }, + { + "epoch": 0.5777777777777777, + "grad_norm": 0.540108337375447, + "learning_rate": 3.288888888888889e-05, + "loss": 0.2962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26411673426628113, + "step": 260, + "valid_targets_mean": 5320.4, + "valid_targets_min": 679 + }, + { + "epoch": 0.5888888888888889, + "grad_norm": 0.5089656627557569, + "learning_rate": 3.352380952380953e-05, + "loss": 0.298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2886936664581299, + "step": 265, + "valid_targets_mean": 5059.9, + "valid_targets_min": 2016 + }, + { + "epoch": 0.6, + "grad_norm": 0.8577322725171138, + "learning_rate": 3.415873015873016e-05, + "loss": 0.2903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2848944664001465, + "step": 270, + "valid_targets_mean": 4490.1, + "valid_targets_min": 369 + }, + { + "epoch": 0.6111111111111112, + "grad_norm": 0.7839943851011535, + "learning_rate": 3.47936507936508e-05, + "loss": 0.2766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2646746039390564, + "step": 275, + "valid_targets_mean": 4372.7, + "valid_targets_min": 347 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 0.6552099358054163, + "learning_rate": 3.542857142857143e-05, + "loss": 0.28, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.279890239238739, + "step": 280, + "valid_targets_mean": 3361.1, + "valid_targets_min": 173 + }, + { + "epoch": 0.6333333333333333, + "grad_norm": 0.6470809605687007, + "learning_rate": 3.606349206349207e-05, + "loss": 0.2958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3344566822052002, + "step": 285, + "valid_targets_mean": 3997.0, + "valid_targets_min": 263 + }, + { + "epoch": 0.6444444444444445, + "grad_norm": 0.6042965183251453, + "learning_rate": 3.66984126984127e-05, + "loss": 0.3008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29824304580688477, + "step": 290, + "valid_targets_mean": 4553.6, + "valid_targets_min": 285 + }, + { + "epoch": 0.6555555555555556, + "grad_norm": 0.9572004588809255, + "learning_rate": 3.733333333333334e-05, + "loss": 0.2685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2197730839252472, + "step": 295, + "valid_targets_mean": 4863.1, + "valid_targets_min": 719 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.613931117239092, + "learning_rate": 3.796825396825397e-05, + "loss": 0.2869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2734295129776001, + "step": 300, + "valid_targets_mean": 3048.4, + "valid_targets_min": 630 + }, + { + "epoch": 0.6777777777777778, + "grad_norm": 0.7225386302879916, + "learning_rate": 3.860317460317461e-05, + "loss": 0.2635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24375391006469727, + "step": 305, + "valid_targets_mean": 4168.9, + "valid_targets_min": 949 + }, + { + "epoch": 0.6888888888888889, + "grad_norm": 0.638212930897173, + "learning_rate": 3.923809523809524e-05, + "loss": 0.2679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2810541093349457, + "step": 310, + "valid_targets_mean": 3710.6, + "valid_targets_min": 275 + }, + { + "epoch": 0.7, + "grad_norm": 0.5816992650976073, + "learning_rate": 3.9873015873015876e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26782992482185364, + "step": 315, + "valid_targets_mean": 4587.9, + "valid_targets_min": 923 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 0.5013466486109508, + "learning_rate": 3.999980352246968e-05, + "loss": 0.2757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24904786050319672, + "step": 320, + "valid_targets_mean": 4606.5, + "valid_targets_min": 402 + }, + { + "epoch": 0.7222222222222222, + "grad_norm": 0.713347061955866, + "learning_rate": 3.9999005339118864e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30004629492759705, + "step": 325, + "valid_targets_mean": 3887.9, + "valid_targets_min": 605 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.47963747379037935, + "learning_rate": 3.9997593194586953e-05, + "loss": 0.2852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2432718276977539, + "step": 330, + "valid_targets_mean": 4275.9, + "valid_targets_min": 535 + }, + { + "epoch": 0.7444444444444445, + "grad_norm": 0.6005660039240017, + "learning_rate": 3.99955671322262e-05, + "loss": 0.2799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2737639248371124, + "step": 335, + "valid_targets_mean": 4195.4, + "valid_targets_min": 300 + }, + { + "epoch": 0.7555555555555555, + "grad_norm": 0.5205837688812412, + "learning_rate": 3.999292721423588e-05, + "loss": 0.2922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3177686035633087, + "step": 340, + "valid_targets_mean": 5234.1, + "valid_targets_min": 460 + }, + { + "epoch": 0.7666666666666667, + "grad_norm": 0.5202820975533874, + "learning_rate": 3.998967352166037e-05, + "loss": 0.2837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27241072058677673, + "step": 345, + "valid_targets_mean": 4445.9, + "valid_targets_min": 656 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 0.5301976190091423, + "learning_rate": 3.998580615438671e-05, + "loss": 0.2983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27598923444747925, + "step": 350, + "valid_targets_mean": 5029.9, + "valid_targets_min": 859 + }, + { + "epoch": 0.7888888888888889, + "grad_norm": 0.5033260846088292, + "learning_rate": 3.998132523114146e-05, + "loss": 0.2885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25926336646080017, + "step": 355, + "valid_targets_mean": 4720.8, + "valid_targets_min": 299 + }, + { + "epoch": 0.8, + "grad_norm": 0.5062497800182023, + "learning_rate": 3.9976230889487107e-05, + "loss": 0.2716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25432416796684265, + "step": 360, + "valid_targets_mean": 4968.1, + "valid_targets_min": 598 + }, + { + "epoch": 0.8111111111111111, + "grad_norm": 0.6022554597774316, + "learning_rate": 3.997052328581783e-05, + "loss": 0.2772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27107948064804077, + "step": 365, + "valid_targets_mean": 4174.4, + "valid_targets_min": 322 + }, + { + "epoch": 0.8222222222222222, + "grad_norm": 0.523430681701777, + "learning_rate": 3.99642025953547e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26425060629844666, + "step": 370, + "valid_targets_mean": 4664.8, + "valid_targets_min": 1464 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 0.5063947141064169, + "learning_rate": 3.9957269012140306e-05, + "loss": 0.2786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2735532522201538, + "step": 375, + "valid_targets_mean": 4143.1, + "valid_targets_min": 428 + }, + { + "epoch": 0.8444444444444444, + "grad_norm": 0.5680326007926829, + "learning_rate": 3.9949722749032755e-05, + "loss": 0.2705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24175962805747986, + "step": 380, + "valid_targets_mean": 3959.0, + "valid_targets_min": 530 + }, + { + "epoch": 0.8555555555555555, + "grad_norm": 0.5324488584110421, + "learning_rate": 3.994156403769922e-05, + "loss": 0.2908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25714313983917236, + "step": 385, + "valid_targets_mean": 4401.1, + "valid_targets_min": 907 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.5577161885914526, + "learning_rate": 3.993279312860876e-05, + "loss": 0.2549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27009209990501404, + "step": 390, + "valid_targets_mean": 4610.8, + "valid_targets_min": 436 + }, + { + "epoch": 0.8777777777777778, + "grad_norm": 0.4942352058414063, + "learning_rate": 3.9923410291024636e-05, + "loss": 0.2673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.271714985370636, + "step": 395, + "valid_targets_mean": 4242.4, + "valid_targets_min": 241 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.49807037133844223, + "learning_rate": 3.991341581299609e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2850569486618042, + "step": 400, + "valid_targets_mean": 5568.6, + "valid_targets_min": 1111 + }, + { + "epoch": 0.9, + "grad_norm": 0.516015376982657, + "learning_rate": 3.990281000134946e-05, + "loss": 0.2851, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2795839309692383, + "step": 405, + "valid_targets_mean": 4242.3, + "valid_targets_min": 317 + }, + { + "epoch": 0.9111111111111111, + "grad_norm": 0.4858123801723977, + "learning_rate": 3.989159318167875e-05, + "loss": 0.281, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2678981423377991, + "step": 410, + "valid_targets_mean": 5212.5, + "valid_targets_min": 419 + }, + { + "epoch": 0.9222222222222223, + "grad_norm": 0.5792943773685142, + "learning_rate": 3.9879765698335705e-05, + "loss": 0.258, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24788373708724976, + "step": 415, + "valid_targets_mean": 4014.3, + "valid_targets_min": 1329 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.5323729594389564, + "learning_rate": 3.986732791441915e-05, + "loss": 0.2765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29175180196762085, + "step": 420, + "valid_targets_mean": 4264.8, + "valid_targets_min": 467 + }, + { + "epoch": 0.9444444444444444, + "grad_norm": 0.5230426930677612, + "learning_rate": 3.985428021176391e-05, + "loss": 0.2613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22425571084022522, + "step": 425, + "valid_targets_mean": 4607.9, + "valid_targets_min": 214 + }, + { + "epoch": 0.9555555555555556, + "grad_norm": 0.5587632060354308, + "learning_rate": 3.984062299092904e-05, + "loss": 0.2607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2455931007862091, + "step": 430, + "valid_targets_mean": 3599.4, + "valid_targets_min": 562 + }, + { + "epoch": 0.9666666666666667, + "grad_norm": 0.5416728906416407, + "learning_rate": 3.982635667118557e-05, + "loss": 0.2771, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2705487310886383, + "step": 435, + "valid_targets_mean": 4116.0, + "valid_targets_min": 795 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 0.5360254523319351, + "learning_rate": 3.981148169050361e-05, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24848337471485138, + "step": 440, + "valid_targets_mean": 3970.2, + "valid_targets_min": 499 + }, + { + "epoch": 0.9888888888888889, + "grad_norm": 0.5253145463662034, + "learning_rate": 3.97959985055389e-05, + "loss": 0.3032, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3328373432159424, + "step": 445, + "valid_targets_mean": 5336.6, + "valid_targets_min": 1847 + }, + { + "epoch": 1.0, + "grad_norm": 0.48196208890145337, + "learning_rate": 3.97799075916188e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27850598096847534, + "step": 450, + "valid_targets_mean": 5520.2, + "valid_targets_min": 1146 + }, + { + "epoch": 1.011111111111111, + "grad_norm": 0.6252822658746067, + "learning_rate": 3.976320944272773e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2661787271499634, + "step": 455, + "valid_targets_mean": 4193.9, + "valid_targets_min": 381 + }, + { + "epoch": 1.0222222222222221, + "grad_norm": 0.4701033558777485, + "learning_rate": 3.9745904571491916e-05, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252180814743042, + "step": 460, + "valid_targets_mean": 5736.6, + "valid_targets_min": 1675 + }, + { + "epoch": 1.0333333333333334, + "grad_norm": 0.8485213019287988, + "learning_rate": 3.972799350916375e-05, + "loss": 0.2631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27109605073928833, + "step": 465, + "valid_targets_mean": 4100.7, + "valid_targets_min": 841 + }, + { + "epoch": 1.0444444444444445, + "grad_norm": 0.5743499080820584, + "learning_rate": 3.970947680560543e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26906871795654297, + "step": 470, + "valid_targets_mean": 3466.4, + "valid_targets_min": 357 + }, + { + "epoch": 1.0555555555555556, + "grad_norm": 0.5031824328141823, + "learning_rate": 3.969035502927208e-05, + "loss": 0.2522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2581416368484497, + "step": 475, + "valid_targets_mean": 4564.5, + "valid_targets_min": 740 + }, + { + "epoch": 1.0666666666666667, + "grad_norm": 0.5700693169245215, + "learning_rate": 3.967062876719433e-05, + "loss": 0.2625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2493741810321808, + "step": 480, + "valid_targets_mean": 4157.4, + "valid_targets_min": 322 + }, + { + "epoch": 1.0777777777777777, + "grad_norm": 0.7056940332930607, + "learning_rate": 3.965029862496023e-05, + "loss": 0.275, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24406126141548157, + "step": 485, + "valid_targets_mean": 3929.6, + "valid_targets_min": 513 + }, + { + "epoch": 1.0888888888888888, + "grad_norm": 0.6063202867573172, + "learning_rate": 3.962936522669674e-05, + "loss": 0.274, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3029976487159729, + "step": 490, + "valid_targets_mean": 4941.8, + "valid_targets_min": 173 + }, + { + "epoch": 1.1, + "grad_norm": 1.6432217580814634, + "learning_rate": 3.960782921505052e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26101475954055786, + "step": 495, + "valid_targets_mean": 4462.2, + "valid_targets_min": 1231 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.45742604503694995, + "learning_rate": 3.9585691251168205e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24371081590652466, + "step": 500, + "valid_targets_mean": 5317.8, + "valid_targets_min": 662 + }, + { + "epoch": 1.1222222222222222, + "grad_norm": 0.583691622746648, + "learning_rate": 3.9562952014676116e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2584843039512634, + "step": 505, + "valid_targets_mean": 3938.7, + "valid_targets_min": 242 + }, + { + "epoch": 1.1333333333333333, + "grad_norm": 0.5669510869721377, + "learning_rate": 3.95396122036594e-05, + "loss": 0.25, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24399635195732117, + "step": 510, + "valid_targets_mean": 3737.6, + "valid_targets_min": 769 + }, + { + "epoch": 1.1444444444444444, + "grad_norm": 0.5041490688945235, + "learning_rate": 3.951567253464058e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2624698877334595, + "step": 515, + "valid_targets_mean": 4929.1, + "valid_targets_min": 229 + }, + { + "epoch": 1.1555555555555554, + "grad_norm": 0.6300993096751855, + "learning_rate": 3.949113374255759e-05, + "loss": 0.2305, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.253865510225296, + "step": 520, + "valid_targets_mean": 3452.0, + "valid_targets_min": 350 + }, + { + "epoch": 1.1666666666666667, + "grad_norm": 0.6297449586516455, + "learning_rate": 3.946599658074117e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2540290951728821, + "step": 525, + "valid_targets_mean": 4043.3, + "valid_targets_min": 377 + }, + { + "epoch": 1.1777777777777778, + "grad_norm": 0.6443213997898508, + "learning_rate": 3.94402618208918e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33983704447746277, + "step": 530, + "valid_targets_mean": 4571.4, + "valid_targets_min": 332 + }, + { + "epoch": 1.1888888888888889, + "grad_norm": 0.4851613432195201, + "learning_rate": 3.9413930253055925e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2413058876991272, + "step": 535, + "valid_targets_mean": 5014.4, + "valid_targets_min": 1004 + }, + { + "epoch": 1.2, + "grad_norm": 0.5924543065212132, + "learning_rate": 3.938700268560179e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26164644956588745, + "step": 540, + "valid_targets_mean": 5537.7, + "valid_targets_min": 682 + }, + { + "epoch": 1.211111111111111, + "grad_norm": 0.5348562981348433, + "learning_rate": 3.935947994519455e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22140653431415558, + "step": 545, + "valid_targets_mean": 3933.1, + "valid_targets_min": 326 + }, + { + "epoch": 1.2222222222222223, + "grad_norm": 1.1507409216365962, + "learning_rate": 3.933136287677095e-05, + "loss": 0.2797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2999425530433655, + "step": 550, + "valid_targets_mean": 4136.8, + "valid_targets_min": 393 + }, + { + "epoch": 1.2333333333333334, + "grad_norm": 0.6036157268361829, + "learning_rate": 3.9302652343513325e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24191945791244507, + "step": 555, + "valid_targets_mean": 3355.1, + "valid_targets_min": 354 + }, + { + "epoch": 1.2444444444444445, + "grad_norm": 0.5594737690815887, + "learning_rate": 3.927334922682319e-05, + "loss": 0.2386, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2738354802131653, + "step": 560, + "valid_targets_mean": 4464.2, + "valid_targets_min": 341 + }, + { + "epoch": 1.2555555555555555, + "grad_norm": 0.60403245110363, + "learning_rate": 3.924345442629405e-05, + "loss": 0.2414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22148557007312775, + "step": 565, + "valid_targets_mean": 3713.0, + "valid_targets_min": 618 + }, + { + "epoch": 1.2666666666666666, + "grad_norm": 0.5326475987050822, + "learning_rate": 3.9212968859683924e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2524668276309967, + "step": 570, + "valid_targets_mean": 4112.8, + "valid_targets_min": 998 + }, + { + "epoch": 1.2777777777777777, + "grad_norm": 0.5255254450372632, + "learning_rate": 3.918189346288708e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25053149461746216, + "step": 575, + "valid_targets_mean": 4201.0, + "valid_targets_min": 359 + }, + { + "epoch": 1.2888888888888888, + "grad_norm": 0.5460490711902419, + "learning_rate": 3.9150229189905325e-05, + "loss": 0.2513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2665417790412903, + "step": 580, + "valid_targets_mean": 4447.8, + "valid_targets_min": 384 + }, + { + "epoch": 1.3, + "grad_norm": 0.8958369211208952, + "learning_rate": 3.911797701281872e-05, + "loss": 0.2594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27293702960014343, + "step": 585, + "valid_targets_mean": 3628.9, + "valid_targets_min": 755 + }, + { + "epoch": 1.3111111111111111, + "grad_norm": 0.5988510933683479, + "learning_rate": 3.9085137921755765e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2321304976940155, + "step": 590, + "valid_targets_mean": 3787.6, + "valid_targets_min": 500 + }, + { + "epoch": 1.3222222222222222, + "grad_norm": 0.5432482040922059, + "learning_rate": 3.9051712924862926e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26185327768325806, + "step": 595, + "valid_targets_mean": 5390.7, + "valid_targets_min": 670 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.5377831944060099, + "learning_rate": 3.901770304827379e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22949066758155823, + "step": 600, + "valid_targets_mean": 4550.2, + "valid_targets_min": 214 + }, + { + "epoch": 1.3444444444444446, + "grad_norm": 0.5612439217393881, + "learning_rate": 3.898310933607746e-05, + "loss": 0.2753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2890508472919464, + "step": 605, + "valid_targets_mean": 4195.9, + "valid_targets_min": 826 + }, + { + "epoch": 1.3555555555555556, + "grad_norm": 0.5303766289315424, + "learning_rate": 3.8947932850286585e-05, + "loss": 0.2389, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.264137327671051, + "step": 610, + "valid_targets_mean": 4352.6, + "valid_targets_min": 335 + }, + { + "epoch": 1.3666666666666667, + "grad_norm": 0.5137510782550517, + "learning_rate": 3.891217467080472e-05, + "loss": 0.2505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23932068049907684, + "step": 615, + "valid_targets_mean": 4644.5, + "valid_targets_min": 1028 + }, + { + "epoch": 1.3777777777777778, + "grad_norm": 0.47391135683532554, + "learning_rate": 3.887583589539315e-05, + "loss": 0.2465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.255960613489151, + "step": 620, + "valid_targets_mean": 4816.4, + "valid_targets_min": 579 + }, + { + "epoch": 1.3888888888888888, + "grad_norm": 0.5572300753645717, + "learning_rate": 3.883891763963723e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373078167438507, + "step": 625, + "valid_targets_mean": 3751.8, + "valid_targets_min": 668 + }, + { + "epoch": 1.4, + "grad_norm": 0.49937213169450123, + "learning_rate": 3.880142103691213e-05, + "loss": 0.2341, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23268617689609528, + "step": 630, + "valid_targets_mean": 4873.1, + "valid_targets_min": 549 + }, + { + "epoch": 1.411111111111111, + "grad_norm": 0.5406692441975431, + "learning_rate": 3.876334723834802e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2593476176261902, + "step": 635, + "valid_targets_mean": 4517.7, + "valid_targets_min": 647 + }, + { + "epoch": 1.4222222222222223, + "grad_norm": 0.553445249581852, + "learning_rate": 3.872469741279475e-05, + "loss": 0.2496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2700212597846985, + "step": 640, + "valid_targets_mean": 3890.1, + "valid_targets_min": 490 + }, + { + "epoch": 1.4333333333333333, + "grad_norm": 0.5301037730927833, + "learning_rate": 3.868547274678595e-05, + "loss": 0.2535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2927553057670593, + "step": 645, + "valid_targets_mean": 4392.2, + "valid_targets_min": 423 + }, + { + "epoch": 1.4444444444444444, + "grad_norm": 0.44259558229551654, + "learning_rate": 3.864567444450263e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24536505341529846, + "step": 650, + "valid_targets_mean": 6347.3, + "valid_targets_min": 1877 + }, + { + "epoch": 1.4555555555555555, + "grad_norm": 0.47771864860111035, + "learning_rate": 3.8605303727736186e-05, + "loss": 0.2434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22662869095802307, + "step": 655, + "valid_targets_mean": 4389.6, + "valid_targets_min": 352 + }, + { + "epoch": 1.4666666666666668, + "grad_norm": 0.6247118916552183, + "learning_rate": 3.856436183585089e-05, + "loss": 0.2638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.283000111579895, + "step": 660, + "valid_targets_mean": 4078.9, + "valid_targets_min": 725 + }, + { + "epoch": 1.4777777777777779, + "grad_norm": 1.5070723708006228, + "learning_rate": 3.8522850025745885e-05, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25614166259765625, + "step": 665, + "valid_targets_mean": 5278.3, + "valid_targets_min": 495 + }, + { + "epoch": 1.488888888888889, + "grad_norm": 0.628570473741109, + "learning_rate": 3.8480769571816535e-05, + "loss": 0.2516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29474788904190063, + "step": 670, + "valid_targets_mean": 3874.1, + "valid_targets_min": 545 + }, + { + "epoch": 1.5, + "grad_norm": 0.8699623629541559, + "learning_rate": 3.843812176591535e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22753040492534637, + "step": 675, + "valid_targets_mean": 3918.7, + "valid_targets_min": 309 + }, + { + "epoch": 1.511111111111111, + "grad_norm": 0.5112871185355921, + "learning_rate": 3.8394907917312314e-05, + "loss": 0.2391, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22634285688400269, + "step": 680, + "valid_targets_mean": 4564.2, + "valid_targets_min": 406 + }, + { + "epoch": 1.5222222222222221, + "grad_norm": 0.47954206990442566, + "learning_rate": 3.835112935265468e-05, + "loss": 0.2412, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2335192710161209, + "step": 685, + "valid_targets_mean": 4790.6, + "valid_targets_min": 337 + }, + { + "epoch": 1.5333333333333332, + "grad_norm": 0.48528209028878294, + "learning_rate": 3.830678741592625e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23643608391284943, + "step": 690, + "valid_targets_mean": 4443.8, + "valid_targets_min": 548 + }, + { + "epoch": 1.5444444444444443, + "grad_norm": 0.6276984597487912, + "learning_rate": 3.826188346840611e-05, + "loss": 0.262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2579842805862427, + "step": 695, + "valid_targets_mean": 3282.2, + "valid_targets_min": 352 + }, + { + "epoch": 1.5555555555555556, + "grad_norm": 0.5476067080880181, + "learning_rate": 3.8216418888626864e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24156105518341064, + "step": 700, + "valid_targets_mean": 4612.9, + "valid_targets_min": 555 + }, + { + "epoch": 1.5666666666666667, + "grad_norm": 0.5413141061562579, + "learning_rate": 3.817039507233227e-05, + "loss": 0.2569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2622939646244049, + "step": 705, + "valid_targets_mean": 4257.5, + "valid_targets_min": 286 + }, + { + "epoch": 1.5777777777777777, + "grad_norm": 0.5191592586122504, + "learning_rate": 3.812381343243444e-05, + "loss": 0.2376, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2551836371421814, + "step": 710, + "valid_targets_mean": 4099.2, + "valid_targets_min": 605 + }, + { + "epoch": 1.588888888888889, + "grad_norm": 0.6436243723923792, + "learning_rate": 3.807667539897041e-05, + "loss": 0.238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.227070152759552, + "step": 715, + "valid_targets_mean": 3246.0, + "valid_targets_min": 348 + }, + { + "epoch": 1.6, + "grad_norm": 0.6239016531037459, + "learning_rate": 3.8028982419058304e-05, + "loss": 0.2585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26668721437454224, + "step": 720, + "valid_targets_mean": 3396.3, + "valid_targets_min": 248 + }, + { + "epoch": 1.6111111111111112, + "grad_norm": 0.9192051442781469, + "learning_rate": 3.798073595685283e-05, + "loss": 0.2473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25195956230163574, + "step": 725, + "valid_targets_mean": 3665.6, + "valid_targets_min": 272 + }, + { + "epoch": 1.6222222222222222, + "grad_norm": 0.5623905152284274, + "learning_rate": 3.793193749350042e-05, + "loss": 0.2523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23393118381500244, + "step": 730, + "valid_targets_mean": 5211.4, + "valid_targets_min": 661 + }, + { + "epoch": 1.6333333333333333, + "grad_norm": 0.5429518143801225, + "learning_rate": 3.788258852709367e-05, + "loss": 0.2593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22861596941947937, + "step": 735, + "valid_targets_mean": 5047.1, + "valid_targets_min": 589 + }, + { + "epoch": 1.6444444444444444, + "grad_norm": 0.47826555516018626, + "learning_rate": 3.7832690572625417e-05, + "loss": 0.2193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21526125073432922, + "step": 740, + "valid_targets_mean": 4602.2, + "valid_targets_min": 839 + }, + { + "epoch": 1.6555555555555554, + "grad_norm": 0.5863707657883438, + "learning_rate": 3.77822451619422e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2455175220966339, + "step": 745, + "valid_targets_mean": 4632.7, + "valid_targets_min": 413 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.5169118790673137, + "learning_rate": 3.773125384369723e-05, + "loss": 0.2438, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27260589599609375, + "step": 750, + "valid_targets_mean": 4461.6, + "valid_targets_min": 331 + }, + { + "epoch": 1.6777777777777778, + "grad_norm": 0.5608117782474311, + "learning_rate": 3.7679718183302856e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25778332352638245, + "step": 755, + "valid_targets_mean": 4585.0, + "valid_targets_min": 616 + }, + { + "epoch": 1.6888888888888889, + "grad_norm": 0.5536343736249013, + "learning_rate": 3.762763976288252e-05, + "loss": 0.261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26178520917892456, + "step": 760, + "valid_targets_mean": 4168.1, + "valid_targets_min": 590 + }, + { + "epoch": 1.7, + "grad_norm": 0.4967067858273974, + "learning_rate": 3.757502018122215e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23201864957809448, + "step": 765, + "valid_targets_mean": 4762.7, + "valid_targets_min": 656 + }, + { + "epoch": 1.7111111111111112, + "grad_norm": 0.6431920727054683, + "learning_rate": 3.7521861053721104e-05, + "loss": 0.2734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3084906339645386, + "step": 770, + "valid_targets_mean": 3282.1, + "valid_targets_min": 355 + }, + { + "epoch": 1.7222222222222223, + "grad_norm": 0.49539913031077976, + "learning_rate": 3.74681640123426e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2623264789581299, + "step": 775, + "valid_targets_mean": 4743.1, + "valid_targets_min": 795 + }, + { + "epoch": 1.7333333333333334, + "grad_norm": 0.5255901704167366, + "learning_rate": 3.741393070556355e-05, + "loss": 0.2469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25359559059143066, + "step": 780, + "valid_targets_mean": 4230.9, + "valid_targets_min": 932 + }, + { + "epoch": 1.7444444444444445, + "grad_norm": 0.5202593209519554, + "learning_rate": 3.7359162798324015e-05, + "loss": 0.2451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3075648546218872, + "step": 785, + "valid_targets_mean": 4931.7, + "valid_targets_min": 426 + }, + { + "epoch": 1.7555555555555555, + "grad_norm": 0.6052760837991578, + "learning_rate": 3.7303861971976074e-05, + "loss": 0.2645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21487626433372498, + "step": 790, + "valid_targets_mean": 3999.6, + "valid_targets_min": 414 + }, + { + "epoch": 1.7666666666666666, + "grad_norm": 0.5902290421718297, + "learning_rate": 3.724802992423218e-05, + "loss": 0.2541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24608749151229858, + "step": 795, + "valid_targets_mean": 3589.3, + "valid_targets_min": 301 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 0.6368794220763241, + "learning_rate": 3.719166836911309e-05, + "loss": 0.2563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25677546858787537, + "step": 800, + "valid_targets_mean": 4686.3, + "valid_targets_min": 703 + }, + { + "epoch": 1.7888888888888888, + "grad_norm": 0.5323661851044132, + "learning_rate": 3.713477903689518e-05, + "loss": 0.2532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2390604317188263, + "step": 805, + "valid_targets_mean": 4084.9, + "valid_targets_min": 339 + }, + { + "epoch": 1.8, + "grad_norm": 0.5186885779998889, + "learning_rate": 3.707736367405741e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22952812910079956, + "step": 810, + "valid_targets_mean": 3964.5, + "valid_targets_min": 238 + }, + { + "epoch": 1.8111111111111111, + "grad_norm": 0.6011432502817368, + "learning_rate": 3.701942404322764e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28318285942077637, + "step": 815, + "valid_targets_mean": 4504.8, + "valid_targets_min": 401 + }, + { + "epoch": 1.8222222222222222, + "grad_norm": 0.5057107246393668, + "learning_rate": 3.696096192312852e-05, + "loss": 0.2468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2750961482524872, + "step": 820, + "valid_targets_mean": 4184.1, + "valid_targets_min": 299 + }, + { + "epoch": 1.8333333333333335, + "grad_norm": 0.5674106572096047, + "learning_rate": 3.690197910852294e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.284572958946228, + "step": 825, + "valid_targets_mean": 4159.1, + "valid_targets_min": 883 + }, + { + "epoch": 1.8444444444444446, + "grad_norm": 0.524428929125523, + "learning_rate": 3.684247741015888e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2377292364835739, + "step": 830, + "valid_targets_mean": 4545.5, + "valid_targets_min": 783 + }, + { + "epoch": 1.8555555555555556, + "grad_norm": 0.5197318284715093, + "learning_rate": 3.678245865471383e-05, + "loss": 0.2482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25039640069007874, + "step": 835, + "valid_targets_mean": 3829.6, + "valid_targets_min": 436 + }, + { + "epoch": 1.8666666666666667, + "grad_norm": 0.741122146720882, + "learning_rate": 3.672192468473872e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25348931550979614, + "step": 840, + "valid_targets_mean": 3802.5, + "valid_targets_min": 494 + }, + { + "epoch": 1.8777777777777778, + "grad_norm": 0.5653997003973549, + "learning_rate": 3.666087735860138e-05, + "loss": 0.2422, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23701179027557373, + "step": 845, + "valid_targets_mean": 4565.3, + "valid_targets_min": 441 + }, + { + "epoch": 1.8888888888888888, + "grad_norm": 0.47961375383032556, + "learning_rate": 3.6599318550429415e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28315088152885437, + "step": 850, + "valid_targets_mean": 4855.1, + "valid_targets_min": 356 + }, + { + "epoch": 1.9, + "grad_norm": 0.4960630224668749, + "learning_rate": 3.653725015005275e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24288593232631683, + "step": 855, + "valid_targets_mean": 4795.5, + "valid_targets_min": 193 + }, + { + "epoch": 1.911111111111111, + "grad_norm": 0.5198260135949281, + "learning_rate": 3.6474674062945573e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2727063298225403, + "step": 860, + "valid_targets_mean": 5094.9, + "valid_targets_min": 339 + }, + { + "epoch": 1.9222222222222223, + "grad_norm": 0.963664843983337, + "learning_rate": 3.6411592210167834e-05, + "loss": 0.242, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2163504958152771, + "step": 865, + "valid_targets_mean": 4430.9, + "valid_targets_min": 434 + }, + { + "epoch": 1.9333333333333333, + "grad_norm": 0.5387347139824947, + "learning_rate": 3.6348006528306295e-05, + "loss": 0.2441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24510449171066284, + "step": 870, + "valid_targets_mean": 4382.1, + "valid_targets_min": 503 + }, + { + "epoch": 1.9444444444444444, + "grad_norm": 0.499809810495946, + "learning_rate": 3.628391896941505e-05, + "loss": 0.2349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22553254663944244, + "step": 875, + "valid_targets_mean": 4035.4, + "valid_targets_min": 263 + }, + { + "epoch": 1.9555555555555557, + "grad_norm": 0.5218149844142983, + "learning_rate": 3.621933150095561e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25849276781082153, + "step": 880, + "valid_targets_mean": 3824.7, + "valid_targets_min": 680 + }, + { + "epoch": 1.9666666666666668, + "grad_norm": 0.5123574858401871, + "learning_rate": 3.615424610573651e-05, + "loss": 0.2681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28296032547950745, + "step": 885, + "valid_targets_mean": 4234.2, + "valid_targets_min": 694 + }, + { + "epoch": 1.9777777777777779, + "grad_norm": 0.5240352817270689, + "learning_rate": 3.608866478185245e-05, + "loss": 0.2466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2798116207122803, + "step": 890, + "valid_targets_mean": 4745.1, + "valid_targets_min": 438 + }, + { + "epoch": 1.988888888888889, + "grad_norm": 0.47021296088948716, + "learning_rate": 3.602258954262287e-05, + "loss": 0.235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24416977167129517, + "step": 895, + "valid_targets_mean": 4577.2, + "valid_targets_min": 272 + }, + { + "epoch": 2.0, + "grad_norm": 0.5152852896564755, + "learning_rate": 3.595602241653028e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.227662593126297, + "step": 900, + "valid_targets_mean": 3932.9, + "valid_targets_min": 701 + }, + { + "epoch": 2.011111111111111, + "grad_norm": 0.8269495388300478, + "learning_rate": 3.588896544715787e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2364978790283203, + "step": 905, + "valid_targets_mean": 4779.8, + "valid_targets_min": 1630 + }, + { + "epoch": 2.022222222222222, + "grad_norm": 0.5822111147854571, + "learning_rate": 3.5821420693126834e-05, + "loss": 0.2403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2130419909954071, + "step": 910, + "valid_targets_mean": 3855.6, + "valid_targets_min": 311 + }, + { + "epoch": 2.033333333333333, + "grad_norm": 0.503530838011803, + "learning_rate": 3.575339022803313e-05, + "loss": 0.2336, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1974865049123764, + "step": 915, + "valid_targets_mean": 3615.4, + "valid_targets_min": 242 + }, + { + "epoch": 2.0444444444444443, + "grad_norm": 0.5881001614957597, + "learning_rate": 3.5684876140383875e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2469121217727661, + "step": 920, + "valid_targets_mean": 4366.9, + "valid_targets_min": 450 + }, + { + "epoch": 2.0555555555555554, + "grad_norm": 0.5375060558594457, + "learning_rate": 3.561588053353319e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21924357116222382, + "step": 925, + "valid_targets_mean": 4148.6, + "valid_targets_min": 1350 + }, + { + "epoch": 2.066666666666667, + "grad_norm": 0.5123963351563102, + "learning_rate": 3.554640552561761e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21245072782039642, + "step": 930, + "valid_targets_mean": 3831.3, + "valid_targets_min": 441 + }, + { + "epoch": 2.077777777777778, + "grad_norm": 0.5890369925664793, + "learning_rate": 3.5476453249491125e-05, + "loss": 0.2297, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22979292273521423, + "step": 935, + "valid_targets_mean": 4769.4, + "valid_targets_min": 780 + }, + { + "epoch": 2.088888888888889, + "grad_norm": 0.5822543746194564, + "learning_rate": 3.5406025852659626e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22272920608520508, + "step": 940, + "valid_targets_mean": 3913.2, + "valid_targets_min": 665 + }, + { + "epoch": 2.1, + "grad_norm": 0.5062356595758193, + "learning_rate": 3.533512549721503e-05, + "loss": 0.2129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19117265939712524, + "step": 945, + "valid_targets_mean": 4397.1, + "valid_targets_min": 379 + }, + { + "epoch": 2.111111111111111, + "grad_norm": 0.5053075095700746, + "learning_rate": 3.5263754359768896e-05, + "loss": 0.2176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2279876470565796, + "step": 950, + "valid_targets_mean": 4971.6, + "valid_targets_min": 1361 + }, + { + "epoch": 2.1222222222222222, + "grad_norm": 0.48516711141414565, + "learning_rate": 3.5191914631385565e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2130102515220642, + "step": 955, + "valid_targets_mean": 4136.3, + "valid_targets_min": 967 + }, + { + "epoch": 2.1333333333333333, + "grad_norm": 0.5779110961074571, + "learning_rate": 3.511960851751496e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2052951157093048, + "step": 960, + "valid_targets_mean": 3846.1, + "valid_targets_min": 629 + }, + { + "epoch": 2.1444444444444444, + "grad_norm": 0.5423358183039504, + "learning_rate": 3.504683823792483e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20998695492744446, + "step": 965, + "valid_targets_mean": 3942.2, + "valid_targets_min": 283 + }, + { + "epoch": 2.1555555555555554, + "grad_norm": 0.4731164937857403, + "learning_rate": 3.49736060266326e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20602105557918549, + "step": 970, + "valid_targets_mean": 4549.1, + "valid_targets_min": 825 + }, + { + "epoch": 2.1666666666666665, + "grad_norm": 0.5192629974349211, + "learning_rate": 3.489991413183686e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1989196240901947, + "step": 975, + "valid_targets_mean": 4872.6, + "valid_targets_min": 556 + }, + { + "epoch": 2.1777777777777776, + "grad_norm": 0.5443326064321022, + "learning_rate": 3.482576481584824e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2225218117237091, + "step": 980, + "valid_targets_mean": 3649.8, + "valid_targets_min": 546 + }, + { + "epoch": 2.188888888888889, + "grad_norm": 0.5020893394971628, + "learning_rate": 3.4751160355020034e-05, + "loss": 0.2094, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23120808601379395, + "step": 985, + "valid_targets_mean": 4243.1, + "valid_targets_min": 841 + }, + { + "epoch": 2.2, + "grad_norm": 0.48166947840338786, + "learning_rate": 3.467610303967829e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18600469827651978, + "step": 990, + "valid_targets_mean": 5170.1, + "valid_targets_min": 1719 + }, + { + "epoch": 2.2111111111111112, + "grad_norm": 0.5583920558407086, + "learning_rate": 3.4600595174051496e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27588558197021484, + "step": 995, + "valid_targets_mean": 4328.1, + "valid_targets_min": 874 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5416793417738504, + "learning_rate": 3.452463907619986e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24462614953517914, + "step": 1000, + "valid_targets_mean": 4362.1, + "valid_targets_min": 722 + }, + { + "epoch": 2.2333333333333334, + "grad_norm": 0.5328060833599986, + "learning_rate": 3.444823707794414e-05, + "loss": 0.2321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2523013949394226, + "step": 1005, + "valid_targets_mean": 4839.0, + "valid_targets_min": 352 + }, + { + "epoch": 2.2444444444444445, + "grad_norm": 0.563391734301446, + "learning_rate": 3.437139152479403e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19773298501968384, + "step": 1010, + "valid_targets_mean": 4385.2, + "valid_targets_min": 580 + }, + { + "epoch": 2.2555555555555555, + "grad_norm": 0.5074311551875781, + "learning_rate": 3.429410477587619e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2079458087682724, + "step": 1015, + "valid_targets_mean": 4402.8, + "valid_targets_min": 270 + }, + { + "epoch": 2.2666666666666666, + "grad_norm": 0.44755347145933255, + "learning_rate": 3.4216379203861785e-05, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20723801851272583, + "step": 1020, + "valid_targets_mean": 6327.4, + "valid_targets_min": 2089 + }, + { + "epoch": 2.2777777777777777, + "grad_norm": 0.5637924379901195, + "learning_rate": 3.41382171948937e-05, + "loss": 0.2394, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2707464098930359, + "step": 1025, + "valid_targets_mean": 4543.5, + "valid_targets_min": 721 + }, + { + "epoch": 2.2888888888888888, + "grad_norm": 0.5493222833081342, + "learning_rate": 3.405962114851324e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23270747065544128, + "step": 1030, + "valid_targets_mean": 3841.1, + "valid_targets_min": 733 + }, + { + "epoch": 2.3, + "grad_norm": 0.5377176986106669, + "learning_rate": 3.398059347758647e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2036696970462799, + "step": 1035, + "valid_targets_mean": 3737.5, + "valid_targets_min": 443 + }, + { + "epoch": 2.311111111111111, + "grad_norm": 0.48575675539344104, + "learning_rate": 3.3901136608230166e-05, + "loss": 0.2424, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24168801307678223, + "step": 1040, + "valid_targets_mean": 5109.9, + "valid_targets_min": 623 + }, + { + "epoch": 2.3222222222222224, + "grad_norm": 0.5389208075547236, + "learning_rate": 3.38212529797373e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2333628237247467, + "step": 1045, + "valid_targets_mean": 4541.2, + "valid_targets_min": 507 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 0.6380380157616352, + "learning_rate": 3.374094504450218e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21096259355545044, + "step": 1050, + "valid_targets_mean": 2654.1, + "valid_targets_min": 218 + }, + { + "epoch": 2.3444444444444446, + "grad_norm": 0.5618636585728415, + "learning_rate": 3.366021526794517e-05, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22567206621170044, + "step": 1055, + "valid_targets_mean": 3951.7, + "valid_targets_min": 310 + }, + { + "epoch": 2.3555555555555556, + "grad_norm": 0.590035420993425, + "learning_rate": 3.357906612843697e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22020503878593445, + "step": 1060, + "valid_targets_mean": 3162.4, + "valid_targets_min": 248 + }, + { + "epoch": 2.3666666666666667, + "grad_norm": 0.5197847274605225, + "learning_rate": 3.349750011722256e-05, + "loss": 0.2298, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23694512248039246, + "step": 1065, + "valid_targets_mean": 4473.9, + "valid_targets_min": 539 + }, + { + "epoch": 2.3777777777777778, + "grad_norm": 0.5902730606680944, + "learning_rate": 3.3415519738344686e-05, + "loss": 0.2292, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23186247050762177, + "step": 1070, + "valid_targets_mean": 4213.4, + "valid_targets_min": 535 + }, + { + "epoch": 2.388888888888889, + "grad_norm": 0.48708079743882093, + "learning_rate": 3.333312750856703e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22630329430103302, + "step": 1075, + "valid_targets_mean": 5137.1, + "valid_targets_min": 485 + }, + { + "epoch": 2.4, + "grad_norm": 0.6201131687835197, + "learning_rate": 3.3250325957296936e-05, + "loss": 0.2296, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2439213991165161, + "step": 1080, + "valid_targets_mean": 4311.1, + "valid_targets_min": 337 + }, + { + "epoch": 2.411111111111111, + "grad_norm": 0.5910991737720884, + "learning_rate": 3.3167117626507726e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22350382804870605, + "step": 1085, + "valid_targets_mean": 4331.2, + "valid_targets_min": 923 + }, + { + "epoch": 2.422222222222222, + "grad_norm": 0.5745602273524396, + "learning_rate": 3.308350507066069e-05, + "loss": 0.2285, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2272610366344452, + "step": 1090, + "valid_targets_mean": 3677.5, + "valid_targets_min": 620 + }, + { + "epoch": 2.4333333333333336, + "grad_norm": 0.5154857981766388, + "learning_rate": 3.2999490856626674e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21586115658283234, + "step": 1095, + "valid_targets_mean": 4540.8, + "valid_targets_min": 449 + }, + { + "epoch": 2.4444444444444446, + "grad_norm": 0.49655172714458007, + "learning_rate": 3.291507756360725e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23002441227436066, + "step": 1100, + "valid_targets_mean": 4644.0, + "valid_targets_min": 328 + }, + { + "epoch": 2.4555555555555557, + "grad_norm": 0.6161023573158174, + "learning_rate": 3.283026778305554e-05, + "loss": 0.224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24754559993743896, + "step": 1105, + "valid_targets_mean": 4383.9, + "valid_targets_min": 263 + }, + { + "epoch": 2.466666666666667, + "grad_norm": 0.47036375778212935, + "learning_rate": 3.2745064118596696e-05, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20647308230400085, + "step": 1110, + "valid_targets_mean": 5761.9, + "valid_targets_min": 689 + }, + { + "epoch": 2.477777777777778, + "grad_norm": 0.5406137267974768, + "learning_rate": 3.265946918594793e-05, + "loss": 0.232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23646476864814758, + "step": 1115, + "valid_targets_mean": 3537.1, + "valid_targets_min": 795 + }, + { + "epoch": 2.488888888888889, + "grad_norm": 0.4637714307748355, + "learning_rate": 3.257348561283822e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22905629873275757, + "step": 1120, + "valid_targets_mean": 5062.0, + "valid_targets_min": 736 + }, + { + "epoch": 2.5, + "grad_norm": 0.4800390566792728, + "learning_rate": 3.248711603892765e-05, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2034926414489746, + "step": 1125, + "valid_targets_mean": 4869.9, + "valid_targets_min": 1248 + }, + { + "epoch": 2.511111111111111, + "grad_norm": 3.5144120907404144, + "learning_rate": 3.240036311572635e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3505418002605438, + "step": 1130, + "valid_targets_mean": 3852.2, + "valid_targets_min": 763 + }, + { + "epoch": 2.522222222222222, + "grad_norm": 0.6588418511183887, + "learning_rate": 3.2313229506513167e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24138295650482178, + "step": 1135, + "valid_targets_mean": 3041.4, + "valid_targets_min": 419 + }, + { + "epoch": 2.533333333333333, + "grad_norm": 0.4351804600110289, + "learning_rate": 3.22257178862538e-05, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22207850217819214, + "step": 1140, + "valid_targets_mean": 5268.9, + "valid_targets_min": 1175 + }, + { + "epoch": 2.5444444444444443, + "grad_norm": 0.5924820635484583, + "learning_rate": 3.213783094151873e-05, + "loss": 0.2299, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24633681774139404, + "step": 1145, + "valid_targets_mean": 4402.8, + "valid_targets_min": 852 + }, + { + "epoch": 2.5555555555555554, + "grad_norm": 0.4637928251048832, + "learning_rate": 3.204957137040079e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22640839219093323, + "step": 1150, + "valid_targets_mean": 4871.2, + "valid_targets_min": 966 + }, + { + "epoch": 2.5666666666666664, + "grad_norm": 0.5791271707374432, + "learning_rate": 3.196094188243224e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24356502294540405, + "step": 1155, + "valid_targets_mean": 4226.2, + "valid_targets_min": 795 + }, + { + "epoch": 2.5777777777777775, + "grad_norm": 0.544475419998541, + "learning_rate": 3.187194519850167e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21199198067188263, + "step": 1160, + "valid_targets_mean": 4666.7, + "valid_targets_min": 555 + }, + { + "epoch": 2.588888888888889, + "grad_norm": 0.5117366718721723, + "learning_rate": 3.17825840507704e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2462947815656662, + "step": 1165, + "valid_targets_mean": 4306.7, + "valid_targets_min": 231 + }, + { + "epoch": 2.6, + "grad_norm": 0.5185736052483539, + "learning_rate": 3.169286118258867e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21428170800209045, + "step": 1170, + "valid_targets_mean": 4292.4, + "valid_targets_min": 374 + }, + { + "epoch": 2.611111111111111, + "grad_norm": 0.5124442481522459, + "learning_rate": 3.1602779348411354e-05, + "loss": 0.2194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21586015820503235, + "step": 1175, + "valid_targets_mean": 4098.7, + "valid_targets_min": 284 + }, + { + "epoch": 2.6222222222222222, + "grad_norm": 0.5013956254301979, + "learning_rate": 3.151234131371348e-05, + "loss": 0.2224, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20073631405830383, + "step": 1180, + "valid_targets_mean": 3848.8, + "valid_targets_min": 473 + }, + { + "epoch": 2.6333333333333333, + "grad_norm": 0.5498700950427212, + "learning_rate": 3.142154985490523e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22234992682933807, + "step": 1185, + "valid_targets_mean": 3493.1, + "valid_targets_min": 754 + }, + { + "epoch": 2.6444444444444444, + "grad_norm": 0.5491708841030193, + "learning_rate": 3.1330407759246805e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22136567533016205, + "step": 1190, + "valid_targets_mean": 3996.4, + "valid_targets_min": 251 + }, + { + "epoch": 2.6555555555555554, + "grad_norm": 0.48268790906327425, + "learning_rate": 3.1238917824762794e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028353214263916, + "step": 1195, + "valid_targets_mean": 4584.0, + "valid_targets_min": 650 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.49007399842315175, + "learning_rate": 3.1147082860156275e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25679466128349304, + "step": 1200, + "valid_targets_mean": 5399.8, + "valid_targets_min": 982 + }, + { + "epoch": 2.677777777777778, + "grad_norm": 0.4633684064891278, + "learning_rate": 3.105490568472266e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2022610604763031, + "step": 1205, + "valid_targets_mean": 4171.5, + "valid_targets_min": 757 + }, + { + "epoch": 2.688888888888889, + "grad_norm": 0.4895671052558776, + "learning_rate": 3.0962389128263025e-05, + "loss": 0.2333, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19812658429145813, + "step": 1210, + "valid_targets_mean": 5454.3, + "valid_targets_min": 420 + }, + { + "epoch": 2.7, + "grad_norm": 0.5453665235834874, + "learning_rate": 3.086953603099736e-05, + "loss": 0.2199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20646308362483978, + "step": 1215, + "valid_targets_mean": 4315.4, + "valid_targets_min": 465 + }, + { + "epoch": 2.7111111111111112, + "grad_norm": 0.5581759362267998, + "learning_rate": 3.077634924347728e-05, + "loss": 0.2182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22275254130363464, + "step": 1220, + "valid_targets_mean": 3761.5, + "valid_targets_min": 269 + }, + { + "epoch": 2.7222222222222223, + "grad_norm": 0.5153650133576909, + "learning_rate": 3.068283162649858e-05, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21216610074043274, + "step": 1225, + "valid_targets_mean": 4164.8, + "valid_targets_min": 296 + }, + { + "epoch": 2.7333333333333334, + "grad_norm": 0.592596188705314, + "learning_rate": 3.0588986051013355e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22181862592697144, + "step": 1230, + "valid_targets_mean": 4035.2, + "valid_targets_min": 283 + }, + { + "epoch": 2.7444444444444445, + "grad_norm": 0.5983613901546053, + "learning_rate": 3.049481539804192e-05, + "loss": 0.2231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22257715463638306, + "step": 1235, + "valid_targets_mean": 3023.5, + "valid_targets_min": 701 + }, + { + "epoch": 2.7555555555555555, + "grad_norm": 0.49305225026999655, + "learning_rate": 3.0400322558584308e-05, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19669756293296814, + "step": 1240, + "valid_targets_mean": 4822.0, + "valid_targets_min": 1400 + }, + { + "epoch": 2.7666666666666666, + "grad_norm": 0.4752603478313126, + "learning_rate": 3.0305510433531568e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23400330543518066, + "step": 1245, + "valid_targets_mean": 4814.9, + "valid_targets_min": 300 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5551934554676596, + "learning_rate": 3.0210381933576654e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2083454132080078, + "step": 1250, + "valid_targets_mean": 3553.5, + "valid_targets_min": 348 + }, + { + "epoch": 2.7888888888888888, + "grad_norm": 0.7744675886957486, + "learning_rate": 3.0114939979125135e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2401510775089264, + "step": 1255, + "valid_targets_mean": 3016.8, + "valid_targets_min": 539 + }, + { + "epoch": 2.8, + "grad_norm": 0.5540461372381361, + "learning_rate": 3.001918750020547e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2751244008541107, + "step": 1260, + "valid_targets_mean": 3901.6, + "valid_targets_min": 539 + }, + { + "epoch": 2.811111111111111, + "grad_norm": 0.6851890356876696, + "learning_rate": 2.992312743637911e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25260940194129944, + "step": 1265, + "valid_targets_mean": 4583.8, + "valid_targets_min": 668 + }, + { + "epoch": 2.822222222222222, + "grad_norm": 0.48363964794454084, + "learning_rate": 2.982676273665023e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2125380039215088, + "step": 1270, + "valid_targets_mean": 5045.1, + "valid_targets_min": 667 + }, + { + "epoch": 2.8333333333333335, + "grad_norm": 0.6033207649950939, + "learning_rate": 2.97300963593752e-05, + "loss": 0.2301, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22328922152519226, + "step": 1275, + "valid_targets_mean": 5039.6, + "valid_targets_min": 1597 + }, + { + "epoch": 2.8444444444444446, + "grad_norm": 0.5440870025040494, + "learning_rate": 2.9633131272171768e-05, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21130454540252686, + "step": 1280, + "valid_targets_mean": 3933.2, + "valid_targets_min": 511 + }, + { + "epoch": 2.8555555555555556, + "grad_norm": 0.46685874495827057, + "learning_rate": 2.953587045182795e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2029666304588318, + "step": 1285, + "valid_targets_mean": 4932.2, + "valid_targets_min": 414 + }, + { + "epoch": 2.8666666666666667, + "grad_norm": 0.5083384300553782, + "learning_rate": 2.943831688421066e-05, + "loss": 0.2263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20045515894889832, + "step": 1290, + "valid_targets_mean": 3839.1, + "valid_targets_min": 2102 + }, + { + "epoch": 2.8777777777777778, + "grad_norm": 0.5496306210874803, + "learning_rate": 2.9340473564174003e-05, + "loss": 0.2254, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2500072121620178, + "step": 1295, + "valid_targets_mean": 4098.8, + "valid_targets_min": 417 + }, + { + "epoch": 2.888888888888889, + "grad_norm": 0.5194734275085567, + "learning_rate": 2.9242343495467396e-05, + "loss": 0.2347, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22234457731246948, + "step": 1300, + "valid_targets_mean": 3903.9, + "valid_targets_min": 411 + }, + { + "epoch": 2.9, + "grad_norm": 0.504766986056304, + "learning_rate": 2.9143929690643292e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23288145661354065, + "step": 1305, + "valid_targets_mean": 5635.0, + "valid_targets_min": 1210 + }, + { + "epoch": 2.911111111111111, + "grad_norm": 0.6498390965909955, + "learning_rate": 2.9045235170964754e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23700065910816193, + "step": 1310, + "valid_targets_mean": 4147.7, + "valid_targets_min": 374 + }, + { + "epoch": 2.9222222222222225, + "grad_norm": 0.47655618469018296, + "learning_rate": 2.8946262966312652e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20102891325950623, + "step": 1315, + "valid_targets_mean": 3986.3, + "valid_targets_min": 301 + }, + { + "epoch": 2.9333333333333336, + "grad_norm": 0.444313610842639, + "learning_rate": 2.8847016115092686e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20955491065979004, + "step": 1320, + "valid_targets_mean": 5599.8, + "valid_targets_min": 403 + }, + { + "epoch": 2.9444444444444446, + "grad_norm": 0.5345322272032264, + "learning_rate": 2.8747497664142075e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21730177104473114, + "step": 1325, + "valid_targets_mean": 3947.6, + "valid_targets_min": 646 + }, + { + "epoch": 2.9555555555555557, + "grad_norm": 0.48868872486689147, + "learning_rate": 2.8647710668636053e-05, + "loss": 0.2251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21820557117462158, + "step": 1330, + "valid_targets_mean": 4328.9, + "valid_targets_min": 301 + }, + { + "epoch": 2.966666666666667, + "grad_norm": 0.5163666912550201, + "learning_rate": 2.8547658191994054e-05, + "loss": 0.2159, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2369096875190735, + "step": 1335, + "valid_targets_mean": 3999.9, + "valid_targets_min": 467 + }, + { + "epoch": 2.977777777777778, + "grad_norm": 0.4525486700692223, + "learning_rate": 2.844734330578567e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22851812839508057, + "step": 1340, + "valid_targets_mean": 5680.0, + "valid_targets_min": 968 + }, + { + "epoch": 2.988888888888889, + "grad_norm": 0.5038511428892624, + "learning_rate": 2.834676908963636e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20257601141929626, + "step": 1345, + "valid_targets_mean": 4431.0, + "valid_targets_min": 604 + }, + { + "epoch": 3.0, + "grad_norm": 0.49413740091680286, + "learning_rate": 2.824593863113291e-05, + "loss": 0.2423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2500663995742798, + "step": 1350, + "valid_targets_mean": 5082.1, + "valid_targets_min": 1029 + }, + { + "epoch": 3.011111111111111, + "grad_norm": 0.5890237248654047, + "learning_rate": 2.814485502572863e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18735849857330322, + "step": 1355, + "valid_targets_mean": 5022.9, + "valid_targets_min": 1330 + }, + { + "epoch": 3.022222222222222, + "grad_norm": 0.5397593530227527, + "learning_rate": 2.804352137664835e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2516242265701294, + "step": 1360, + "valid_targets_mean": 5099.1, + "valid_targets_min": 360 + }, + { + "epoch": 3.033333333333333, + "grad_norm": 0.568525348633587, + "learning_rate": 2.7941940794793122e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19268794357776642, + "step": 1365, + "valid_targets_mean": 3634.8, + "valid_targets_min": 294 + }, + { + "epoch": 3.0444444444444443, + "grad_norm": 0.504377586126035, + "learning_rate": 2.7840116398644742e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19320614635944366, + "step": 1370, + "valid_targets_mean": 4622.8, + "valid_targets_min": 599 + }, + { + "epoch": 3.0555555555555554, + "grad_norm": 0.6922737256269529, + "learning_rate": 2.7738051314169993e-05, + "loss": 0.195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2083386778831482, + "step": 1375, + "valid_targets_mean": 5940.1, + "valid_targets_min": 2844 + }, + { + "epoch": 3.066666666666667, + "grad_norm": 0.45807741860705403, + "learning_rate": 2.7635748674724702e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17992693185806274, + "step": 1380, + "valid_targets_mean": 5056.9, + "valid_targets_min": 581 + }, + { + "epoch": 3.077777777777778, + "grad_norm": 0.49380418579141977, + "learning_rate": 2.7533211620957532e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20595912635326385, + "step": 1385, + "valid_targets_mean": 5148.5, + "valid_targets_min": 2209 + }, + { + "epoch": 3.088888888888889, + "grad_norm": 0.6287045912836227, + "learning_rate": 2.743044330071356e-05, + "loss": 0.2061, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2235325574874878, + "step": 1390, + "valid_targets_mean": 3636.7, + "valid_targets_min": 382 + }, + { + "epoch": 3.1, + "grad_norm": 0.5630479669186463, + "learning_rate": 2.7327446868937664e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19244463741779327, + "step": 1395, + "valid_targets_mean": 5513.7, + "valid_targets_min": 361 + }, + { + "epoch": 3.111111111111111, + "grad_norm": 0.47811337357814554, + "learning_rate": 2.7224225487577637e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1944870501756668, + "step": 1400, + "valid_targets_mean": 5109.3, + "valid_targets_min": 214 + }, + { + "epoch": 3.1222222222222222, + "grad_norm": 0.5608741822584281, + "learning_rate": 2.712078232548714e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1881995052099228, + "step": 1405, + "valid_targets_mean": 4487.5, + "valid_targets_min": 650 + }, + { + "epoch": 3.1333333333333333, + "grad_norm": 0.5261157563058694, + "learning_rate": 2.7017120558328395e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1874210387468338, + "step": 1410, + "valid_targets_mean": 4228.4, + "valid_targets_min": 467 + }, + { + "epoch": 3.1444444444444444, + "grad_norm": 0.5496647265564715, + "learning_rate": 2.6913243368474734e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22300265729427338, + "step": 1415, + "valid_targets_mean": 4393.3, + "valid_targets_min": 642 + }, + { + "epoch": 3.1555555555555554, + "grad_norm": 0.49758618583607633, + "learning_rate": 2.680915394491286e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19458116590976715, + "step": 1420, + "valid_targets_mean": 4739.6, + "valid_targets_min": 542 + }, + { + "epoch": 3.1666666666666665, + "grad_norm": 0.5218888450001141, + "learning_rate": 2.6704855483144973e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21135935187339783, + "step": 1425, + "valid_targets_mean": 4440.1, + "valid_targets_min": 992 + }, + { + "epoch": 3.1777777777777776, + "grad_norm": 0.5426640353608564, + "learning_rate": 2.6600351185090637e-05, + "loss": 0.211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19701051712036133, + "step": 1430, + "valid_targets_mean": 3513.1, + "valid_targets_min": 291 + }, + { + "epoch": 3.188888888888889, + "grad_norm": 0.5662706793465802, + "learning_rate": 2.649564425898853e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21139845252037048, + "step": 1435, + "valid_targets_mean": 3985.6, + "valid_targets_min": 699 + }, + { + "epoch": 3.2, + "grad_norm": 0.4718898918421046, + "learning_rate": 2.6390737919297925e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1822149157524109, + "step": 1440, + "valid_targets_mean": 5072.8, + "valid_targets_min": 903 + }, + { + "epoch": 3.2111111111111112, + "grad_norm": 0.5271902172821914, + "learning_rate": 2.6285635386599983e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1844690442085266, + "step": 1445, + "valid_targets_mean": 4660.9, + "valid_targets_min": 942 + }, + { + "epoch": 3.2222222222222223, + "grad_norm": 0.7537646783410501, + "learning_rate": 2.618033988749895e-05, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20052474737167358, + "step": 1450, + "valid_targets_mean": 3193.3, + "valid_targets_min": 263 + }, + { + "epoch": 3.2333333333333334, + "grad_norm": 0.4690363852812957, + "learning_rate": 2.6074854654523023e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16611334681510925, + "step": 1455, + "valid_targets_mean": 4196.4, + "valid_targets_min": 469 + }, + { + "epoch": 3.2444444444444445, + "grad_norm": 0.4572908827805106, + "learning_rate": 2.596918292602518e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18506214022636414, + "step": 1460, + "valid_targets_mean": 5220.0, + "valid_targets_min": 507 + }, + { + "epoch": 3.2555555555555555, + "grad_norm": 0.5395419499985299, + "learning_rate": 2.586332794608371e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18608039617538452, + "step": 1465, + "valid_targets_mean": 3894.8, + "valid_targets_min": 1041 + }, + { + "epoch": 3.2666666666666666, + "grad_norm": 0.8552816175037915, + "learning_rate": 2.5757292964402653e-05, + "loss": 0.1956, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18753841519355774, + "step": 1470, + "valid_targets_mean": 4177.2, + "valid_targets_min": 357 + }, + { + "epoch": 3.2777777777777777, + "grad_norm": 0.6502074224948179, + "learning_rate": 2.5651081236212045e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1877031922340393, + "step": 1475, + "valid_targets_mean": 3714.6, + "valid_targets_min": 587 + }, + { + "epoch": 3.2888888888888888, + "grad_norm": 0.504138623747214, + "learning_rate": 2.5544696022167945e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19270633161067963, + "step": 1480, + "valid_targets_mean": 5232.9, + "valid_targets_min": 621 + }, + { + "epoch": 3.3, + "grad_norm": 0.5562353529549702, + "learning_rate": 2.5438140588252367e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23178471624851227, + "step": 1485, + "valid_targets_mean": 4443.0, + "valid_targets_min": 1115 + }, + { + "epoch": 3.311111111111111, + "grad_norm": 0.6990935382908224, + "learning_rate": 2.5331418205672988e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1817629337310791, + "step": 1490, + "valid_targets_mean": 3285.0, + "valid_targets_min": 387 + }, + { + "epoch": 3.3222222222222224, + "grad_norm": 0.5161517693720137, + "learning_rate": 2.522453215076277e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21749022603034973, + "step": 1495, + "valid_targets_mean": 5134.9, + "valid_targets_min": 839 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.546904151431328, + "learning_rate": 2.511748570487932e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23374782502651215, + "step": 1500, + "valid_targets_mean": 4250.7, + "valid_targets_min": 588 + }, + { + "epoch": 3.3444444444444446, + "grad_norm": 0.47277844481370324, + "learning_rate": 2.5010282154304193e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20504996180534363, + "step": 1505, + "valid_targets_mean": 5340.2, + "valid_targets_min": 586 + }, + { + "epoch": 3.3555555555555556, + "grad_norm": 0.5412015372324224, + "learning_rate": 2.4902924790142004e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20246189832687378, + "step": 1510, + "valid_targets_mean": 3491.1, + "valid_targets_min": 218 + }, + { + "epoch": 3.3666666666666667, + "grad_norm": 0.5638774958328592, + "learning_rate": 2.479541690821935e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22698086500167847, + "step": 1515, + "valid_targets_mean": 3940.7, + "valid_targets_min": 568 + }, + { + "epoch": 3.3777777777777778, + "grad_norm": 0.4561709138055978, + "learning_rate": 2.4687761808983693e-05, + "loss": 0.1977, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1643875539302826, + "step": 1520, + "valid_targets_mean": 5342.7, + "valid_targets_min": 1439 + }, + { + "epoch": 3.388888888888889, + "grad_norm": 0.5234462175070752, + "learning_rate": 2.457996279740199e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19301840662956238, + "step": 1525, + "valid_targets_mean": 4052.8, + "valid_targets_min": 636 + }, + { + "epoch": 3.4, + "grad_norm": 0.5228201850030787, + "learning_rate": 2.4472023182859257e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18041759729385376, + "step": 1530, + "valid_targets_mean": 4235.0, + "valid_targets_min": 1815 + }, + { + "epoch": 3.411111111111111, + "grad_norm": 0.5751004806856855, + "learning_rate": 2.4363946279056947e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20007622241973877, + "step": 1535, + "valid_targets_mean": 5299.6, + "valid_targets_min": 1225 + }, + { + "epoch": 3.422222222222222, + "grad_norm": 0.5955197652307113, + "learning_rate": 2.4255735403911243e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19078674912452698, + "step": 1540, + "valid_targets_mean": 3658.2, + "valid_targets_min": 808 + }, + { + "epoch": 3.4333333333333336, + "grad_norm": 0.5223462323520682, + "learning_rate": 2.4147393879451205e-05, + "loss": 0.215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22115221619606018, + "step": 1545, + "valid_targets_mean": 4941.4, + "valid_targets_min": 530 + }, + { + "epoch": 3.4444444444444446, + "grad_norm": 0.5358147295716755, + "learning_rate": 2.4038925031716755e-05, + "loss": 0.2036, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1802097111940384, + "step": 1550, + "valid_targets_mean": 3892.2, + "valid_targets_min": 376 + }, + { + "epoch": 3.4555555555555557, + "grad_norm": 0.5061336217509301, + "learning_rate": 2.3930332190656604e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1854897439479828, + "step": 1555, + "valid_targets_mean": 4797.3, + "valid_targets_min": 545 + }, + { + "epoch": 3.466666666666667, + "grad_norm": 0.6266246922904652, + "learning_rate": 2.382161869002599e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2148759663105011, + "step": 1560, + "valid_targets_mean": 3841.4, + "valid_targets_min": 822 + }, + { + "epoch": 3.477777777777778, + "grad_norm": 0.7044788267096704, + "learning_rate": 2.371278786728436e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24996237456798553, + "step": 1565, + "valid_targets_mean": 2695.3, + "valid_targets_min": 332 + }, + { + "epoch": 3.488888888888889, + "grad_norm": 0.49301848581474994, + "learning_rate": 2.3603843063492892e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028578519821167, + "step": 1570, + "valid_targets_mean": 5528.7, + "valid_targets_min": 2284 + }, + { + "epoch": 3.5, + "grad_norm": 0.5882627059324441, + "learning_rate": 2.3494787623211954e-05, + "loss": 0.1965, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19135794043540955, + "step": 1575, + "valid_targets_mean": 3645.6, + "valid_targets_min": 1006 + }, + { + "epoch": 3.511111111111111, + "grad_norm": 0.4963690972784803, + "learning_rate": 2.3385624894398387e-05, + "loss": 0.1973, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20491212606430054, + "step": 1580, + "valid_targets_mean": 5376.0, + "valid_targets_min": 769 + }, + { + "epoch": 3.522222222222222, + "grad_norm": 0.538419155290378, + "learning_rate": 2.3276358228302757e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20943906903266907, + "step": 1585, + "valid_targets_mean": 3945.6, + "valid_targets_min": 590 + }, + { + "epoch": 3.533333333333333, + "grad_norm": 0.6038918409459312, + "learning_rate": 2.316699097936646e-05, + "loss": 0.191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1826208233833313, + "step": 1590, + "valid_targets_mean": 2921.8, + "valid_targets_min": 247 + }, + { + "epoch": 3.5444444444444443, + "grad_norm": 0.48461226325428114, + "learning_rate": 2.305752650511874e-05, + "loss": 0.1971, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23485904932022095, + "step": 1595, + "valid_targets_mean": 5048.4, + "valid_targets_min": 550 + }, + { + "epoch": 3.5555555555555554, + "grad_norm": 0.4544047818243079, + "learning_rate": 2.2947968166073627e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17371800541877747, + "step": 1600, + "valid_targets_mean": 4993.6, + "valid_targets_min": 503 + }, + { + "epoch": 3.5666666666666664, + "grad_norm": 0.4627984476791086, + "learning_rate": 2.2838319325626746e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17911431193351746, + "step": 1605, + "valid_targets_mean": 5385.9, + "valid_targets_min": 2454 + }, + { + "epoch": 3.5777777777777775, + "grad_norm": 0.5655196781582149, + "learning_rate": 2.2728583349952094e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18993490934371948, + "step": 1610, + "valid_targets_mean": 4732.7, + "valid_targets_min": 774 + }, + { + "epoch": 3.588888888888889, + "grad_norm": 0.5352167184955361, + "learning_rate": 2.2618763607898666e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1996089518070221, + "step": 1615, + "valid_targets_mean": 4169.6, + "valid_targets_min": 391 + }, + { + "epoch": 3.6, + "grad_norm": 0.533764539999318, + "learning_rate": 2.250886347088707e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1746983379125595, + "step": 1620, + "valid_targets_mean": 4142.2, + "valid_targets_min": 402 + }, + { + "epoch": 3.611111111111111, + "grad_norm": 0.510040722808607, + "learning_rate": 2.2398886312805996e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19001047313213348, + "step": 1625, + "valid_targets_mean": 5082.9, + "valid_targets_min": 636 + }, + { + "epoch": 3.6222222222222222, + "grad_norm": 0.5755748579267672, + "learning_rate": 2.228883550990864e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21132859587669373, + "step": 1630, + "valid_targets_mean": 4745.2, + "valid_targets_min": 537 + }, + { + "epoch": 3.6333333333333333, + "grad_norm": 0.4921775589074553, + "learning_rate": 2.2178714440709084e-05, + "loss": 0.203, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1739899069070816, + "step": 1635, + "valid_targets_mean": 4681.7, + "valid_targets_min": 750 + }, + { + "epoch": 3.6444444444444444, + "grad_norm": 0.4640557234030302, + "learning_rate": 2.206852648587853e-05, + "loss": 0.2106, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20348605513572693, + "step": 1640, + "valid_targets_mean": 5669.5, + "valid_targets_min": 932 + }, + { + "epoch": 3.6555555555555554, + "grad_norm": 0.5916842012997149, + "learning_rate": 2.1958275028141566e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27221259474754333, + "step": 1645, + "valid_targets_mean": 4139.3, + "valid_targets_min": 510 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 0.5390040585860258, + "learning_rate": 2.1847963452172283e-05, + "loss": 0.192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1900901198387146, + "step": 1650, + "valid_targets_mean": 3726.5, + "valid_targets_min": 825 + }, + { + "epoch": 3.677777777777778, + "grad_norm": 0.5164729678261945, + "learning_rate": 2.173759514449037e-05, + "loss": 0.2014, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19901810586452484, + "step": 1655, + "valid_targets_mean": 4287.4, + "valid_targets_min": 651 + }, + { + "epoch": 3.688888888888889, + "grad_norm": 0.5897377625071923, + "learning_rate": 2.1627173493357167e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2279503494501114, + "step": 1660, + "valid_targets_mean": 4376.0, + "valid_targets_min": 460 + }, + { + "epoch": 3.7, + "grad_norm": 0.5386462237457673, + "learning_rate": 2.1516701888671633e-05, + "loss": 0.2009, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19399359822273254, + "step": 1665, + "valid_targets_mean": 4516.8, + "valid_targets_min": 1662 + }, + { + "epoch": 3.7111111111111112, + "grad_norm": 0.6495411852106558, + "learning_rate": 2.1406183721866274e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21747617423534393, + "step": 1670, + "valid_targets_mean": 3540.1, + "valid_targets_min": 859 + }, + { + "epoch": 3.7222222222222223, + "grad_norm": 0.6306749469465672, + "learning_rate": 2.1295622385803036e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2108190804719925, + "step": 1675, + "valid_targets_mean": 3204.0, + "valid_targets_min": 394 + }, + { + "epoch": 3.7333333333333334, + "grad_norm": 0.5431455240184233, + "learning_rate": 2.118502127466916e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17692044377326965, + "step": 1680, + "valid_targets_mean": 4319.1, + "valid_targets_min": 897 + }, + { + "epoch": 3.7444444444444445, + "grad_norm": 0.6628241987227439, + "learning_rate": 2.1074383783872932e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21650046110153198, + "step": 1685, + "valid_targets_mean": 4163.4, + "valid_targets_min": 597 + }, + { + "epoch": 3.7555555555555555, + "grad_norm": 0.5030160452803907, + "learning_rate": 2.0963713309939516e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18289291858673096, + "step": 1690, + "valid_targets_mean": 4629.3, + "valid_targets_min": 1126 + }, + { + "epoch": 3.7666666666666666, + "grad_norm": 0.4851965144847549, + "learning_rate": 2.0853013250406616e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18697203695774078, + "step": 1695, + "valid_targets_mean": 4800.1, + "valid_targets_min": 449 + }, + { + "epoch": 3.7777777777777777, + "grad_norm": 0.5785969302326881, + "learning_rate": 2.0742287003720207e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21332597732543945, + "step": 1700, + "valid_targets_mean": 3694.1, + "valid_targets_min": 786 + }, + { + "epoch": 3.7888888888888888, + "grad_norm": 0.5232277659642465, + "learning_rate": 2.063153796913022e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18892058730125427, + "step": 1705, + "valid_targets_mean": 4132.3, + "valid_targets_min": 915 + }, + { + "epoch": 3.8, + "grad_norm": 0.4794174421487984, + "learning_rate": 2.0520769546586133e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17793911695480347, + "step": 1710, + "valid_targets_mean": 4531.4, + "valid_targets_min": 629 + }, + { + "epoch": 3.811111111111111, + "grad_norm": 0.6049071480792642, + "learning_rate": 2.040998513663265e-05, + "loss": 0.2127, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1857706606388092, + "step": 1715, + "valid_targets_mean": 3913.2, + "valid_targets_min": 760 + }, + { + "epoch": 3.822222222222222, + "grad_norm": 0.5173798800043088, + "learning_rate": 2.0299188140305276e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23684772849082947, + "step": 1720, + "valid_targets_mean": 4988.8, + "valid_targets_min": 907 + }, + { + "epoch": 3.8333333333333335, + "grad_norm": 0.475948457370568, + "learning_rate": 2.0188381959025905e-05, + "loss": 0.2102, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21646377444267273, + "step": 1725, + "valid_targets_mean": 5030.8, + "valid_targets_min": 893 + }, + { + "epoch": 3.8444444444444446, + "grad_norm": 0.5604528558221183, + "learning_rate": 2.007756999449841e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21436989307403564, + "step": 1730, + "valid_targets_mean": 4397.3, + "valid_targets_min": 436 + }, + { + "epoch": 3.8555555555555556, + "grad_norm": 0.5570797649957374, + "learning_rate": 1.9966755648604214e-05, + "loss": 0.2082, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24257858097553253, + "step": 1735, + "valid_targets_mean": 4071.1, + "valid_targets_min": 727 + }, + { + "epoch": 3.8666666666666667, + "grad_norm": 0.46080018957317304, + "learning_rate": 1.985594232329783e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18925702571868896, + "step": 1740, + "valid_targets_mean": 5059.1, + "valid_targets_min": 701 + }, + { + "epoch": 3.8777777777777778, + "grad_norm": 0.4812829548781769, + "learning_rate": 1.9745133420502465e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1966492235660553, + "step": 1745, + "valid_targets_mean": 5313.2, + "valid_targets_min": 356 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.5203906670242995, + "learning_rate": 1.963433234200553e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21304510533809662, + "step": 1750, + "valid_targets_mean": 5512.0, + "valid_targets_min": 2531 + }, + { + "epoch": 3.9, + "grad_norm": 0.6126769424919433, + "learning_rate": 1.9523542489354256e-05, + "loss": 0.2047, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22729770839214325, + "step": 1755, + "valid_targets_mean": 5147.4, + "valid_targets_min": 806 + }, + { + "epoch": 3.911111111111111, + "grad_norm": 0.7274046140099543, + "learning_rate": 1.941276726375122e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21173256635665894, + "step": 1760, + "valid_targets_mean": 3680.5, + "valid_targets_min": 341 + }, + { + "epoch": 3.9222222222222225, + "grad_norm": 0.5066040807104453, + "learning_rate": 1.930201006594999e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1861998438835144, + "step": 1765, + "valid_targets_mean": 4067.4, + "valid_targets_min": 679 + }, + { + "epoch": 3.9333333333333336, + "grad_norm": 0.5122320443909304, + "learning_rate": 1.9191274296150636e-05, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1726941615343094, + "step": 1770, + "valid_targets_mean": 5269.1, + "valid_targets_min": 2317 + }, + { + "epoch": 3.9444444444444446, + "grad_norm": 0.49916849396763285, + "learning_rate": 1.9080563353895468e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22421357035636902, + "step": 1775, + "valid_targets_mean": 5178.7, + "valid_targets_min": 1286 + }, + { + "epoch": 3.9555555555555557, + "grad_norm": 0.5416199282424495, + "learning_rate": 1.8969880637964523e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20803487300872803, + "step": 1780, + "valid_targets_mean": 4335.1, + "valid_targets_min": 558 + }, + { + "epoch": 3.966666666666667, + "grad_norm": 0.5193931663669582, + "learning_rate": 1.885922954627137e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.189504474401474, + "step": 1785, + "valid_targets_mean": 4079.9, + "valid_targets_min": 238 + }, + { + "epoch": 3.977777777777778, + "grad_norm": 0.5499095062300164, + "learning_rate": 1.874861347575867e-05, + "loss": 0.1983, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20028920471668243, + "step": 1790, + "valid_targets_mean": 3822.6, + "valid_targets_min": 289 + }, + { + "epoch": 3.988888888888889, + "grad_norm": 0.8033600112574325, + "learning_rate": 1.8638035822294e-05, + "loss": 0.2081, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1908196657896042, + "step": 1795, + "valid_targets_mean": 3417.7, + "valid_targets_min": 579 + }, + { + "epoch": 4.0, + "grad_norm": 0.5699774855020878, + "learning_rate": 1.8527499980565505e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20389728248119354, + "step": 1800, + "valid_targets_mean": 3703.6, + "valid_targets_min": 626 + }, + { + "epoch": 4.011111111111111, + "grad_norm": 0.5684718820432649, + "learning_rate": 1.841700934397776e-05, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16114360094070435, + "step": 1805, + "valid_targets_mean": 4019.9, + "valid_targets_min": 359 + }, + { + "epoch": 4.022222222222222, + "grad_norm": 0.8806388269505611, + "learning_rate": 1.8306567304547537e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19996638596057892, + "step": 1810, + "valid_targets_mean": 4715.6, + "valid_targets_min": 626 + }, + { + "epoch": 4.033333333333333, + "grad_norm": 0.51053756371864, + "learning_rate": 1.8196177252799715e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17319592833518982, + "step": 1815, + "valid_targets_mean": 4569.9, + "valid_targets_min": 310 + }, + { + "epoch": 4.044444444444444, + "grad_norm": 0.5334838424279315, + "learning_rate": 1.8085842577663152e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17766398191452026, + "step": 1820, + "valid_targets_mean": 4956.6, + "valid_targets_min": 1254 + }, + { + "epoch": 4.055555555555555, + "grad_norm": 0.5972352034988178, + "learning_rate": 1.797556666636669e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21527108550071716, + "step": 1825, + "valid_targets_mean": 4466.4, + "valid_targets_min": 752 + }, + { + "epoch": 4.066666666666666, + "grad_norm": 0.6680425414614198, + "learning_rate": 1.786535290433512e-05, + "loss": 0.2079, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18751859664916992, + "step": 1830, + "valid_targets_mean": 3271.0, + "valid_targets_min": 257 + }, + { + "epoch": 4.0777777777777775, + "grad_norm": 0.6531224124175032, + "learning_rate": 1.775520467508531e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17578667402267456, + "step": 1835, + "valid_targets_mean": 4452.1, + "valid_targets_min": 272 + }, + { + "epoch": 4.088888888888889, + "grad_norm": 0.5260657108991496, + "learning_rate": 1.7645125360122254e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1891654133796692, + "step": 1840, + "valid_targets_mean": 4629.7, + "valid_targets_min": 449 + }, + { + "epoch": 4.1, + "grad_norm": 0.6623628808793622, + "learning_rate": 1.7535118338835358e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20977142453193665, + "step": 1845, + "valid_targets_mean": 4678.6, + "valid_targets_min": 706 + }, + { + "epoch": 4.111111111111111, + "grad_norm": 0.49817831354909214, + "learning_rate": 1.7425186988394586e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15036168694496155, + "step": 1850, + "valid_targets_mean": 4100.6, + "valid_targets_min": 460 + }, + { + "epoch": 4.122222222222222, + "grad_norm": 0.7692282976104367, + "learning_rate": 1.7315334683646898e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2357831746339798, + "step": 1855, + "valid_targets_mean": 3612.0, + "valid_targets_min": 469 + }, + { + "epoch": 4.133333333333334, + "grad_norm": 0.7604226917728939, + "learning_rate": 1.7205564797012523e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18661850690841675, + "step": 1860, + "valid_targets_mean": 2620.3, + "valid_targets_min": 350 + }, + { + "epoch": 4.144444444444445, + "grad_norm": 2.0101196160603068, + "learning_rate": 1.709588069838154e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724318563938141, + "step": 1865, + "valid_targets_mean": 5017.0, + "valid_targets_min": 1569 + }, + { + "epoch": 4.155555555555556, + "grad_norm": 0.5176081748220945, + "learning_rate": 1.698628575501034e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19876284897327423, + "step": 1870, + "valid_targets_mean": 5018.9, + "valid_targets_min": 263 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 0.9808635975742276, + "learning_rate": 1.6876783331418298e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1666911095380783, + "step": 1875, + "valid_targets_mean": 4702.4, + "valid_targets_min": 407 + }, + { + "epoch": 4.177777777777778, + "grad_norm": 0.5808524542461861, + "learning_rate": 1.6767376789284463e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1758621335029602, + "step": 1880, + "valid_targets_mean": 4182.8, + "valid_targets_min": 784 + }, + { + "epoch": 4.188888888888889, + "grad_norm": 0.6204999195476525, + "learning_rate": 1.6658069487344375e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2005249559879303, + "step": 1885, + "valid_targets_mean": 3792.5, + "valid_targets_min": 494 + }, + { + "epoch": 4.2, + "grad_norm": 0.5137903997287736, + "learning_rate": 1.6548864781286922e-05, + "loss": 0.1972, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2340593785047531, + "step": 1890, + "valid_targets_mean": 5812.1, + "valid_targets_min": 524 + }, + { + "epoch": 4.211111111111111, + "grad_norm": 0.6084912042277302, + "learning_rate": 1.643976602365136e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16975033283233643, + "step": 1895, + "valid_targets_mean": 4331.1, + "valid_targets_min": 331 + }, + { + "epoch": 4.222222222222222, + "grad_norm": 0.6075386711012212, + "learning_rate": 1.6330776563724354e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22459827363491058, + "step": 1900, + "valid_targets_mean": 4126.8, + "valid_targets_min": 795 + }, + { + "epoch": 4.233333333333333, + "grad_norm": 0.5755129056791891, + "learning_rate": 1.62218997474372e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21275658905506134, + "step": 1905, + "valid_targets_mean": 4578.6, + "valid_targets_min": 1108 + }, + { + "epoch": 4.2444444444444445, + "grad_norm": 0.48496618553325505, + "learning_rate": 1.6113138917263048e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19767220318317413, + "step": 1910, + "valid_targets_mean": 5804.9, + "valid_targets_min": 2846 + }, + { + "epoch": 4.2555555555555555, + "grad_norm": 0.4549668072241525, + "learning_rate": 1.6004497412114354e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17819060385227203, + "step": 1915, + "valid_targets_mean": 5614.6, + "valid_targets_min": 1831 + }, + { + "epoch": 4.266666666666667, + "grad_norm": 0.6183406372685394, + "learning_rate": 1.5895978567240314e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15784139931201935, + "step": 1920, + "valid_targets_mean": 4810.9, + "valid_targets_min": 773 + }, + { + "epoch": 4.277777777777778, + "grad_norm": 0.8322895904947307, + "learning_rate": 1.578758571412455e-05, + "loss": 0.1762, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18652507662773132, + "step": 1925, + "valid_targets_mean": 4329.2, + "valid_targets_min": 386 + }, + { + "epoch": 4.288888888888889, + "grad_norm": 0.5770135845347438, + "learning_rate": 1.5679322180382725e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16608160734176636, + "step": 1930, + "valid_targets_mean": 4621.5, + "valid_targets_min": 311 + }, + { + "epoch": 4.3, + "grad_norm": 0.5340562181628422, + "learning_rate": 1.5571191289660517e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19259527325630188, + "step": 1935, + "valid_targets_mean": 4542.4, + "valid_targets_min": 260 + }, + { + "epoch": 4.311111111111111, + "grad_norm": 0.5261906980551986, + "learning_rate": 1.5463196361531463e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1652071177959442, + "step": 1940, + "valid_targets_mean": 4197.6, + "valid_targets_min": 351 + }, + { + "epoch": 4.322222222222222, + "grad_norm": 0.592935992602908, + "learning_rate": 1.5355340711395154e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173154816031456, + "step": 1945, + "valid_targets_mean": 3825.3, + "valid_targets_min": 362 + }, + { + "epoch": 4.333333333333333, + "grad_norm": 0.6272334939060681, + "learning_rate": 1.5247627650375356e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2008039653301239, + "step": 1950, + "valid_targets_mean": 3113.1, + "valid_targets_min": 322 + }, + { + "epoch": 4.344444444444444, + "grad_norm": 0.555228561708398, + "learning_rate": 1.5140060485218448e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21200916171073914, + "step": 1955, + "valid_targets_mean": 4375.6, + "valid_targets_min": 310 + }, + { + "epoch": 4.355555555555555, + "grad_norm": 0.6835598461585742, + "learning_rate": 1.5032642518191842e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14425814151763916, + "step": 1960, + "valid_targets_mean": 3954.5, + "valid_targets_min": 330 + }, + { + "epoch": 4.366666666666666, + "grad_norm": 0.6035952467820439, + "learning_rate": 1.4925377046982642e-05, + "loss": 0.2015, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21049894392490387, + "step": 1965, + "valid_targets_mean": 4949.8, + "valid_targets_min": 355 + }, + { + "epoch": 4.377777777777778, + "grad_norm": 0.5857625986334895, + "learning_rate": 1.4818267364596382e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18046881258487701, + "step": 1970, + "valid_targets_mean": 4697.1, + "valid_targets_min": 299 + }, + { + "epoch": 4.388888888888889, + "grad_norm": 0.5718012391776226, + "learning_rate": 1.4711316759255963e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622503101825714, + "step": 1975, + "valid_targets_mean": 3431.1, + "valid_targets_min": 354 + }, + { + "epoch": 4.4, + "grad_norm": 0.43582007281524776, + "learning_rate": 1.4604528514300657e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15850958228111267, + "step": 1980, + "valid_targets_mean": 5733.1, + "valid_targets_min": 910 + }, + { + "epoch": 4.411111111111111, + "grad_norm": 0.49374637349645484, + "learning_rate": 1.449790590808537e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18013733625411987, + "step": 1985, + "valid_targets_mean": 5131.2, + "valid_targets_min": 337 + }, + { + "epoch": 4.4222222222222225, + "grad_norm": 0.572851756273217, + "learning_rate": 1.4391452213879949e-05, + "loss": 0.193, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20474226772785187, + "step": 1990, + "valid_targets_mean": 4462.7, + "valid_targets_min": 325 + }, + { + "epoch": 4.433333333333334, + "grad_norm": 0.5793355474922025, + "learning_rate": 1.428517069976872e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16491413116455078, + "step": 1995, + "valid_targets_mean": 3528.5, + "valid_targets_min": 552 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.5903074938236061, + "learning_rate": 1.4179064628550139e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816416233778, + "step": 2000, + "valid_targets_mean": 3910.4, + "valid_targets_min": 763 + }, + { + "epoch": 4.455555555555556, + "grad_norm": 0.6305819902409494, + "learning_rate": 1.4073137257636664e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19318446516990662, + "step": 2005, + "valid_targets_mean": 3550.6, + "valid_targets_min": 302 + }, + { + "epoch": 4.466666666666667, + "grad_norm": 0.4779759860608762, + "learning_rate": 1.3967391838954692e-05, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18327465653419495, + "step": 2010, + "valid_targets_mean": 5106.4, + "valid_targets_min": 500 + }, + { + "epoch": 4.477777777777778, + "grad_norm": 0.5305939803137136, + "learning_rate": 1.3861831618844797e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1693880707025528, + "step": 2015, + "valid_targets_mean": 4351.6, + "valid_targets_min": 229 + }, + { + "epoch": 4.488888888888889, + "grad_norm": 0.556324424618143, + "learning_rate": 1.3756459837962006e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20899684727191925, + "step": 2020, + "valid_targets_mean": 4319.3, + "valid_targets_min": 549 + }, + { + "epoch": 4.5, + "grad_norm": 0.511876429216537, + "learning_rate": 1.3651279731176364e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17796722054481506, + "step": 2025, + "valid_targets_mean": 4701.2, + "valid_targets_min": 288 + }, + { + "epoch": 4.511111111111111, + "grad_norm": 0.483714945780867, + "learning_rate": 1.354629452747357e-05, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20415997505187988, + "step": 2030, + "valid_targets_mean": 5413.3, + "valid_targets_min": 828 + }, + { + "epoch": 4.522222222222222, + "grad_norm": 0.550003762747203, + "learning_rate": 1.3441507449855914e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19250717759132385, + "step": 2035, + "valid_targets_mean": 4369.6, + "valid_targets_min": 479 + }, + { + "epoch": 4.533333333333333, + "grad_norm": 0.6783462987349089, + "learning_rate": 1.3336921715243269e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19246190786361694, + "step": 2040, + "valid_targets_mean": 3528.1, + "valid_targets_min": 374 + }, + { + "epoch": 4.544444444444444, + "grad_norm": 0.5408235091774686, + "learning_rate": 1.323254053437438e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17241522669792175, + "step": 2045, + "valid_targets_mean": 4255.3, + "valid_targets_min": 369 + }, + { + "epoch": 4.555555555555555, + "grad_norm": 0.6039322559430673, + "learning_rate": 1.3128367111708263e-05, + "loss": 0.1996, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2341468334197998, + "step": 2050, + "valid_targets_mean": 3956.6, + "valid_targets_min": 394 + }, + { + "epoch": 4.566666666666666, + "grad_norm": 0.5210903518867492, + "learning_rate": 1.3024404645325852e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15793342888355255, + "step": 2055, + "valid_targets_mean": 4055.9, + "valid_targets_min": 445 + }, + { + "epoch": 4.5777777777777775, + "grad_norm": 0.5833872344585873, + "learning_rate": 1.2920656326831802e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1832209825515747, + "step": 2060, + "valid_targets_mean": 4347.2, + "valid_targets_min": 821 + }, + { + "epoch": 4.588888888888889, + "grad_norm": 0.6031674137203222, + "learning_rate": 1.2817125341256533e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1834879219532013, + "step": 2065, + "valid_targets_mean": 4529.2, + "valid_targets_min": 599 + }, + { + "epoch": 4.6, + "grad_norm": 0.5606753056516752, + "learning_rate": 1.271381486695841e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19605132937431335, + "step": 2070, + "valid_targets_mean": 4212.9, + "valid_targets_min": 787 + }, + { + "epoch": 4.611111111111111, + "grad_norm": 0.5776621511116948, + "learning_rate": 1.2610728075526226e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18587753176689148, + "step": 2075, + "valid_targets_mean": 4053.1, + "valid_targets_min": 691 + }, + { + "epoch": 4.622222222222222, + "grad_norm": 0.5798129503418713, + "learning_rate": 1.250786813168176e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20402874052524567, + "step": 2080, + "valid_targets_mean": 3672.6, + "valid_targets_min": 344 + }, + { + "epoch": 4.633333333333333, + "grad_norm": 0.6919963067401517, + "learning_rate": 1.2405238193182711e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17745724320411682, + "step": 2085, + "valid_targets_mean": 4872.3, + "valid_targets_min": 893 + }, + { + "epoch": 4.644444444444445, + "grad_norm": 0.5410119427822812, + "learning_rate": 1.2302841410725664e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15331071615219116, + "step": 2090, + "valid_targets_mean": 3988.9, + "valid_targets_min": 428 + }, + { + "epoch": 4.655555555555556, + "grad_norm": 0.7216545479667651, + "learning_rate": 1.2200680927849447e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1797543466091156, + "step": 2095, + "valid_targets_mean": 2614.6, + "valid_targets_min": 251 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.5002180832671941, + "learning_rate": 1.2098759880838562e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18461236357688904, + "step": 2100, + "valid_targets_mean": 4813.4, + "valid_targets_min": 435 + }, + { + "epoch": 4.677777777777778, + "grad_norm": 0.6182929611883429, + "learning_rate": 1.1997081398626951e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2105226218700409, + "step": 2105, + "valid_targets_mean": 4094.2, + "valid_targets_min": 323 + }, + { + "epoch": 4.688888888888889, + "grad_norm": 0.4881507674799763, + "learning_rate": 1.18956486027019e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19194287061691284, + "step": 2110, + "valid_targets_mean": 5929.9, + "valid_targets_min": 393 + }, + { + "epoch": 4.7, + "grad_norm": 0.5040091679866664, + "learning_rate": 1.179446460700824e-05, + "loss": 0.1725, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16895370185375214, + "step": 2115, + "valid_targets_mean": 4929.5, + "valid_targets_min": 707 + }, + { + "epoch": 4.711111111111111, + "grad_norm": 0.6224311426866647, + "learning_rate": 1.1693532517852723e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19463957846164703, + "step": 2120, + "valid_targets_mean": 3470.4, + "valid_targets_min": 274 + }, + { + "epoch": 4.722222222222222, + "grad_norm": 0.5006685355462409, + "learning_rate": 1.1592855433808694e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18951840698719025, + "step": 2125, + "valid_targets_mean": 4650.5, + "valid_targets_min": 472 + }, + { + "epoch": 4.733333333333333, + "grad_norm": 0.5583432290318547, + "learning_rate": 1.1492436445620925e-05, + "loss": 0.2116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2058507204055786, + "step": 2130, + "valid_targets_mean": 4445.6, + "valid_targets_min": 507 + }, + { + "epoch": 4.7444444444444445, + "grad_norm": 0.6115562244324856, + "learning_rate": 1.1392278636110779e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816525161266327, + "step": 2135, + "valid_targets_mean": 3926.6, + "valid_targets_min": 317 + }, + { + "epoch": 4.7555555555555555, + "grad_norm": 0.6799289643246074, + "learning_rate": 1.1292385080081517e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18920482695102692, + "step": 2140, + "valid_targets_mean": 3852.1, + "valid_targets_min": 664 + }, + { + "epoch": 4.766666666666667, + "grad_norm": 0.5372580596002592, + "learning_rate": 1.1192758844223936e-05, + "loss": 0.1866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19938793778419495, + "step": 2145, + "valid_targets_mean": 4286.1, + "valid_targets_min": 1315 + }, + { + "epoch": 4.777777777777778, + "grad_norm": 0.6680811833814686, + "learning_rate": 1.1093402987022213e-05, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1695014089345932, + "step": 2150, + "valid_targets_mean": 3701.4, + "valid_targets_min": 490 + }, + { + "epoch": 4.788888888888889, + "grad_norm": 0.532047483958881, + "learning_rate": 1.0994320558660027e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17513462901115417, + "step": 2155, + "valid_targets_mean": 4425.6, + "valid_targets_min": 433 + }, + { + "epoch": 4.8, + "grad_norm": 0.48955367777151076, + "learning_rate": 1.0895514600926885e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17399895191192627, + "step": 2160, + "valid_targets_mean": 5218.5, + "valid_targets_min": 214 + }, + { + "epoch": 4.811111111111111, + "grad_norm": 0.5389514735096081, + "learning_rate": 1.0796988147124767e-05, + "loss": 0.1975, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17753294110298157, + "step": 2165, + "valid_targets_mean": 4527.4, + "valid_targets_min": 503 + }, + { + "epoch": 4.822222222222222, + "grad_norm": 0.9507144410317093, + "learning_rate": 1.0698744221974992e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16774040460586548, + "step": 2170, + "valid_targets_mean": 4067.5, + "valid_targets_min": 326 + }, + { + "epoch": 4.833333333333333, + "grad_norm": 0.7577965149578579, + "learning_rate": 1.0600785841525387e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.171237975358963, + "step": 2175, + "valid_targets_mean": 2212.1, + "valid_targets_min": 244 + }, + { + "epoch": 4.844444444444444, + "grad_norm": 0.6346378233034229, + "learning_rate": 1.050311601305765e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19538968801498413, + "step": 2180, + "valid_targets_mean": 3583.8, + "valid_targets_min": 516 + }, + { + "epoch": 4.855555555555555, + "grad_norm": 0.4998326129850529, + "learning_rate": 1.0405737734995083e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18201160430908203, + "step": 2185, + "valid_targets_mean": 5208.7, + "valid_targets_min": 532 + }, + { + "epoch": 4.866666666666667, + "grad_norm": 0.4912406558278592, + "learning_rate": 1.0308653996810464e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1872013956308365, + "step": 2190, + "valid_targets_mean": 5092.2, + "valid_targets_min": 425 + }, + { + "epoch": 4.877777777777778, + "grad_norm": 0.49376056988072214, + "learning_rate": 1.0211867778934367e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690564602613449, + "step": 2195, + "valid_targets_mean": 4442.9, + "valid_targets_min": 429 + }, + { + "epoch": 4.888888888888889, + "grad_norm": 0.508817245830961, + "learning_rate": 1.0115382052663585e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18353170156478882, + "step": 2200, + "valid_targets_mean": 4733.5, + "valid_targets_min": 771 + }, + { + "epoch": 4.9, + "grad_norm": 0.6203985436463371, + "learning_rate": 1.0019199780069964e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.186293363571167, + "step": 2205, + "valid_targets_mean": 5280.8, + "valid_targets_min": 1076 + }, + { + "epoch": 4.911111111111111, + "grad_norm": 0.5435822402870782, + "learning_rate": 9.923323913909432e-06, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16027119755744934, + "step": 2210, + "valid_targets_mean": 4576.7, + "valid_targets_min": 715 + }, + { + "epoch": 4.9222222222222225, + "grad_norm": 0.4813208514339158, + "learning_rate": 9.827757397531373e-06, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18010596930980682, + "step": 2215, + "valid_targets_mean": 4874.1, + "valid_targets_min": 514 + }, + { + "epoch": 4.933333333333334, + "grad_norm": 0.47571932658220617, + "learning_rate": 9.732503164788251e-06, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2023318111896515, + "step": 2220, + "valid_targets_mean": 5265.2, + "valid_targets_min": 713 + }, + { + "epoch": 4.944444444444445, + "grad_norm": 0.5094329037736811, + "learning_rate": 9.637564139945576e-06, + "loss": 0.1875, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17661988735198975, + "step": 2225, + "valid_targets_mean": 4558.8, + "valid_targets_min": 687 + }, + { + "epoch": 4.955555555555556, + "grad_norm": 0.4793807998595178, + "learning_rate": 9.542943237592087e-06, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16008728742599487, + "step": 2230, + "valid_targets_mean": 4763.2, + "valid_targets_min": 322 + }, + { + "epoch": 4.966666666666667, + "grad_norm": 0.5198576392426981, + "learning_rate": 9.448643362550289e-06, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1592457890510559, + "step": 2235, + "valid_targets_mean": 4264.2, + "valid_targets_min": 533 + }, + { + "epoch": 4.977777777777778, + "grad_norm": 0.6619815493156004, + "learning_rate": 9.354667409787293e-06, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2216649353504181, + "step": 2240, + "valid_targets_mean": 3992.6, + "valid_targets_min": 507 + }, + { + "epoch": 4.988888888888889, + "grad_norm": 0.6140070608373793, + "learning_rate": 9.261018264325934e-06, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19572074711322784, + "step": 2245, + "valid_targets_mean": 4591.1, + "valid_targets_min": 537 + }, + { + "epoch": 5.0, + "grad_norm": 0.501892983243998, + "learning_rate": 9.16769880115619e-06, + "loss": 0.1883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1657697856426239, + "step": 2250, + "valid_targets_mean": 4708.9, + "valid_targets_min": 535 + }, + { + "epoch": 5.011111111111111, + "grad_norm": 0.5678698075193299, + "learning_rate": 9.074711885146928e-06, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18971869349479675, + "step": 2255, + "valid_targets_mean": 4164.5, + "valid_targets_min": 473 + }, + { + "epoch": 5.022222222222222, + "grad_norm": 0.4976115195176415, + "learning_rate": 8.982060370957953e-06, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1750781536102295, + "step": 2260, + "valid_targets_mean": 5020.1, + "valid_targets_min": 290 + }, + { + "epoch": 5.033333333333333, + "grad_norm": 0.684873203009687, + "learning_rate": 8.889747102952388e-06, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18005429208278656, + "step": 2265, + "valid_targets_mean": 3440.0, + "valid_targets_min": 307 + }, + { + "epoch": 5.044444444444444, + "grad_norm": 0.5033230409129855, + "learning_rate": 8.79777491510932e-06, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18455460667610168, + "step": 2270, + "valid_targets_mean": 5328.9, + "valid_targets_min": 247 + }, + { + "epoch": 5.055555555555555, + "grad_norm": 0.5173477816105783, + "learning_rate": 8.706146630936833e-06, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16611988842487335, + "step": 2275, + "valid_targets_mean": 4557.4, + "valid_targets_min": 1456 + }, + { + "epoch": 5.066666666666666, + "grad_norm": 0.6042166823752133, + "learning_rate": 8.6148650633853e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16022253036499023, + "step": 2280, + "valid_targets_mean": 4060.4, + "valid_targets_min": 406 + }, + { + "epoch": 5.0777777777777775, + "grad_norm": 0.6312726838673673, + "learning_rate": 8.523933014761038e-06, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1730879247188568, + "step": 2285, + "valid_targets_mean": 3444.7, + "valid_targets_min": 591 + }, + { + "epoch": 5.088888888888889, + "grad_norm": 0.5063988724236291, + "learning_rate": 8.43335327664027e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17903856933116913, + "step": 2290, + "valid_targets_mean": 4995.8, + "valid_targets_min": 362 + }, + { + "epoch": 5.1, + "grad_norm": 0.596851968168963, + "learning_rate": 8.343128629783457e-06, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816779375076294, + "step": 2295, + "valid_targets_mean": 5093.5, + "valid_targets_min": 1031 + }, + { + "epoch": 5.111111111111111, + "grad_norm": 0.5710225280632183, + "learning_rate": 8.253261844049883e-06, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.168840691447258, + "step": 2300, + "valid_targets_mean": 4315.1, + "valid_targets_min": 404 + }, + { + "epoch": 5.122222222222222, + "grad_norm": 0.4748229596313232, + "learning_rate": 8.163755678312651e-06, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2132357954978943, + "step": 2305, + "valid_targets_mean": 5789.9, + "valid_targets_min": 283 + }, + { + "epoch": 5.133333333333334, + "grad_norm": 0.5287110732175844, + "learning_rate": 8.074612880373972e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1418144404888153, + "step": 2310, + "valid_targets_mean": 4440.9, + "valid_targets_min": 820 + }, + { + "epoch": 5.144444444444445, + "grad_norm": 0.5936650959791397, + "learning_rate": 7.985836186880836e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1570664495229721, + "step": 2315, + "valid_targets_mean": 4190.9, + "valid_targets_min": 335 + }, + { + "epoch": 5.155555555555556, + "grad_norm": 0.5901961361581314, + "learning_rate": 7.897428323240961e-06, + "loss": 0.1688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14300040900707245, + "step": 2320, + "valid_targets_mean": 4530.4, + "valid_targets_min": 244 + }, + { + "epoch": 5.166666666666667, + "grad_norm": 0.6401412212898742, + "learning_rate": 7.809392003539142e-06, + "loss": 0.1821, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18366971611976624, + "step": 2325, + "valid_targets_mean": 4497.3, + "valid_targets_min": 263 + }, + { + "epoch": 5.177777777777778, + "grad_norm": 0.5870360837488321, + "learning_rate": 7.72172993045393e-06, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19780203700065613, + "step": 2330, + "valid_targets_mean": 4467.4, + "valid_targets_min": 229 + }, + { + "epoch": 5.188888888888889, + "grad_norm": 0.6222022824851804, + "learning_rate": 7.634444795174671e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1735324114561081, + "step": 2335, + "valid_targets_mean": 3150.3, + "valid_targets_min": 545 + }, + { + "epoch": 5.2, + "grad_norm": 0.612607191802424, + "learning_rate": 7.547539277318861e-06, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17350366711616516, + "step": 2340, + "valid_targets_mean": 4619.4, + "valid_targets_min": 2673 + }, + { + "epoch": 5.211111111111111, + "grad_norm": 0.7049036474479938, + "learning_rate": 7.461016044849918e-06, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1929350346326828, + "step": 2345, + "valid_targets_mean": 3115.6, + "valid_targets_min": 669 + }, + { + "epoch": 5.222222222222222, + "grad_norm": 0.5410958789958754, + "learning_rate": 7.374877753995224e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1535416543483734, + "step": 2350, + "valid_targets_mean": 4524.3, + "valid_targets_min": 270 + }, + { + "epoch": 5.233333333333333, + "grad_norm": 0.6124896649786123, + "learning_rate": 7.289127049164648e-06, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19674161076545715, + "step": 2355, + "valid_targets_mean": 4170.1, + "valid_targets_min": 330 + }, + { + "epoch": 5.2444444444444445, + "grad_norm": 0.4994651140821769, + "learning_rate": 7.203766562869303e-06, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17009888589382172, + "step": 2360, + "valid_targets_mean": 5436.4, + "valid_targets_min": 231 + }, + { + "epoch": 5.2555555555555555, + "grad_norm": 0.6480412371115537, + "learning_rate": 7.118798915640779e-06, + "loss": 0.1703, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17472770810127258, + "step": 2365, + "valid_targets_mean": 3678.0, + "valid_targets_min": 550 + }, + { + "epoch": 5.266666666666667, + "grad_norm": 1.0624757859362757, + "learning_rate": 7.03422671595065e-06, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1649678647518158, + "step": 2370, + "valid_targets_mean": 4187.2, + "valid_targets_min": 576 + }, + { + "epoch": 5.277777777777778, + "grad_norm": 0.5370386672344407, + "learning_rate": 6.950052560130414e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16847020387649536, + "step": 2375, + "valid_targets_mean": 4683.0, + "valid_targets_min": 347 + }, + { + "epoch": 5.288888888888889, + "grad_norm": 0.6286328398732474, + "learning_rate": 6.866279032291792e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15064719319343567, + "step": 2380, + "valid_targets_mean": 3968.8, + "valid_targets_min": 860 + }, + { + "epoch": 5.3, + "grad_norm": 0.5874233077841997, + "learning_rate": 6.782908704247404e-06, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1599939465522766, + "step": 2385, + "valid_targets_mean": 4385.2, + "valid_targets_min": 489 + }, + { + "epoch": 5.311111111111111, + "grad_norm": 1.3591182836953655, + "learning_rate": 6.699944135431788e-06, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15932810306549072, + "step": 2390, + "valid_targets_mean": 3961.2, + "valid_targets_min": 312 + }, + { + "epoch": 5.322222222222222, + "grad_norm": 0.5070411136628474, + "learning_rate": 6.617387872822842e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17544835805892944, + "step": 2395, + "valid_targets_mean": 5117.5, + "valid_targets_min": 760 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 0.53150416040399, + "learning_rate": 6.535242450863632e-06, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18131661415100098, + "step": 2400, + "valid_targets_mean": 4886.9, + "valid_targets_min": 950 + }, + { + "epoch": 5.344444444444444, + "grad_norm": 0.5740042351970523, + "learning_rate": 6.453510391384606e-06, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17442232370376587, + "step": 2405, + "valid_targets_mean": 3791.2, + "valid_targets_min": 423 + }, + { + "epoch": 5.355555555555555, + "grad_norm": 0.46895250507011754, + "learning_rate": 6.372194203526121e-06, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1439400017261505, + "step": 2410, + "valid_targets_mean": 5206.1, + "valid_targets_min": 546 + }, + { + "epoch": 5.366666666666666, + "grad_norm": 0.6356884401904439, + "learning_rate": 6.2912963836614916e-06, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20238983631134033, + "step": 2415, + "valid_targets_mean": 3916.1, + "valid_targets_min": 259 + }, + { + "epoch": 5.377777777777778, + "grad_norm": 0.6215829652547431, + "learning_rate": 6.210819415320253e-06, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14836809039115906, + "step": 2420, + "valid_targets_mean": 4363.0, + "valid_targets_min": 1439 + }, + { + "epoch": 5.388888888888889, + "grad_norm": 0.5563038744061828, + "learning_rate": 6.130765769112024e-06, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17539286613464355, + "step": 2425, + "valid_targets_mean": 4587.9, + "valid_targets_min": 678 + }, + { + "epoch": 5.4, + "grad_norm": 0.5854838140260286, + "learning_rate": 6.051137902650575e-06, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1495419442653656, + "step": 2430, + "valid_targets_mean": 5041.7, + "valid_targets_min": 556 + }, + { + "epoch": 5.411111111111111, + "grad_norm": 0.5363335763908142, + "learning_rate": 5.9719382604784405e-06, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16383875906467438, + "step": 2435, + "valid_targets_mean": 4707.6, + "valid_targets_min": 403 + }, + { + "epoch": 5.4222222222222225, + "grad_norm": 0.6920747040184556, + "learning_rate": 5.893169273991825e-06, + "loss": 0.1698, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20277723670005798, + "step": 2440, + "valid_targets_mean": 3596.3, + "valid_targets_min": 322 + }, + { + "epoch": 5.433333333333334, + "grad_norm": 0.5506429671164328, + "learning_rate": 5.8148333613659945e-06, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16613167524337769, + "step": 2445, + "valid_targets_mean": 4807.0, + "valid_targets_min": 322 + }, + { + "epoch": 5.444444444444445, + "grad_norm": 0.5643735346992593, + "learning_rate": 5.736932927481016e-06, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1531161069869995, + "step": 2450, + "valid_targets_mean": 4414.6, + "valid_targets_min": 820 + }, + { + "epoch": 5.455555555555556, + "grad_norm": 0.4966057762741589, + "learning_rate": 5.659470363847956e-06, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15961554646492004, + "step": 2455, + "valid_targets_mean": 5181.2, + "valid_targets_min": 717 + }, + { + "epoch": 5.466666666666667, + "grad_norm": 0.597256774781567, + "learning_rate": 5.5824480485354315e-06, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19598601758480072, + "step": 2460, + "valid_targets_mean": 5393.7, + "valid_targets_min": 377 + }, + { + "epoch": 5.477777777777778, + "grad_norm": 0.47416903570786223, + "learning_rate": 5.505868346096623e-06, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17863276600837708, + "step": 2465, + "valid_targets_mean": 5649.1, + "valid_targets_min": 982 + }, + { + "epoch": 5.488888888888889, + "grad_norm": 0.656500574854733, + "learning_rate": 5.429733607496674e-06, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15013918280601501, + "step": 2470, + "valid_targets_mean": 3773.6, + "valid_targets_min": 442 + }, + { + "epoch": 5.5, + "grad_norm": 0.5765586477143745, + "learning_rate": 5.354046170040537e-06, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18464767932891846, + "step": 2475, + "valid_targets_mean": 4310.9, + "valid_targets_min": 359 + }, + { + "epoch": 5.511111111111111, + "grad_norm": 0.513219195279251, + "learning_rate": 5.278808357301186e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16760313510894775, + "step": 2480, + "valid_targets_mean": 4913.3, + "valid_targets_min": 259 + }, + { + "epoch": 5.522222222222222, + "grad_norm": 0.6307469353575386, + "learning_rate": 5.204022479048325e-06, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18006855249404907, + "step": 2485, + "valid_targets_mean": 3771.8, + "valid_targets_min": 419 + }, + { + "epoch": 5.533333333333333, + "grad_norm": 0.6063474311283815, + "learning_rate": 5.129690831177425e-06, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1837840974330902, + "step": 2490, + "valid_targets_mean": 3647.8, + "valid_targets_min": 524 + }, + { + "epoch": 5.544444444444444, + "grad_norm": 0.5318846921681258, + "learning_rate": 5.055815695639303e-06, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1946224868297577, + "step": 2495, + "valid_targets_mean": 4591.3, + "valid_targets_min": 433 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 0.593321647760732, + "learning_rate": 4.982399340370017e-06, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18851664662361145, + "step": 2500, + "valid_targets_mean": 4363.1, + "valid_targets_min": 771 + }, + { + "epoch": 5.566666666666666, + "grad_norm": 0.5435021274855566, + "learning_rate": 4.909444019221274e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1688232570886612, + "step": 2505, + "valid_targets_mean": 4495.8, + "valid_targets_min": 328 + }, + { + "epoch": 5.5777777777777775, + "grad_norm": 0.4945201322111143, + "learning_rate": 4.836951971891215e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14214378595352173, + "step": 2510, + "valid_targets_mean": 5189.9, + "valid_targets_min": 1223 + }, + { + "epoch": 5.588888888888889, + "grad_norm": 0.5455726240398745, + "learning_rate": 4.764925423855669e-06, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1754768192768097, + "step": 2515, + "valid_targets_mean": 4304.2, + "valid_targets_min": 479 + }, + { + "epoch": 5.6, + "grad_norm": 0.6294730140787744, + "learning_rate": 4.693366586299824e-06, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15617169439792633, + "step": 2520, + "valid_targets_mean": 3736.4, + "valid_targets_min": 218 + }, + { + "epoch": 5.611111111111111, + "grad_norm": 0.5250062129007003, + "learning_rate": 4.622277656050369e-06, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17956486344337463, + "step": 2525, + "valid_targets_mean": 5230.8, + "valid_targets_min": 2431 + }, + { + "epoch": 5.622222222222222, + "grad_norm": 0.7086051651918763, + "learning_rate": 4.551660815508012e-06, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606767177581787, + "step": 2530, + "valid_targets_mean": 4573.8, + "valid_targets_min": 285 + }, + { + "epoch": 5.633333333333333, + "grad_norm": 0.48064688634395325, + "learning_rate": 4.481518232580515e-06, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1643509864807129, + "step": 2535, + "valid_targets_mean": 5939.2, + "valid_targets_min": 2884 + }, + { + "epoch": 5.644444444444445, + "grad_norm": 0.5324700467948121, + "learning_rate": 4.411852060616115e-06, + "loss": 0.182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1770201474428177, + "step": 2540, + "valid_targets_mean": 5397.2, + "valid_targets_min": 283 + }, + { + "epoch": 5.655555555555556, + "grad_norm": 0.5436206325195655, + "learning_rate": 4.342664438337447e-06, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14190086722373962, + "step": 2545, + "valid_targets_mean": 4431.3, + "valid_targets_min": 850 + }, + { + "epoch": 5.666666666666667, + "grad_norm": 0.4773318240610541, + "learning_rate": 4.273957489775862e-06, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15811610221862793, + "step": 2550, + "valid_targets_mean": 4552.0, + "valid_targets_min": 873 + }, + { + "epoch": 5.677777777777778, + "grad_norm": 0.5208734691655355, + "learning_rate": 4.205733324206216e-06, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16764241456985474, + "step": 2555, + "valid_targets_mean": 4251.2, + "valid_targets_min": 777 + }, + { + "epoch": 5.688888888888889, + "grad_norm": 0.4928403449962443, + "learning_rate": 4.137994036082138e-06, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.175938218832016, + "step": 2560, + "valid_targets_mean": 5055.7, + "valid_targets_min": 365 + }, + { + "epoch": 5.7, + "grad_norm": 0.5244262215031825, + "learning_rate": 4.070741704971726e-06, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2002006471157074, + "step": 2565, + "valid_targets_mean": 4992.8, + "valid_targets_min": 377 + }, + { + "epoch": 5.711111111111111, + "grad_norm": 0.661490029880272, + "learning_rate": 4.003978395493682e-06, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1974271535873413, + "step": 2570, + "valid_targets_mean": 3547.4, + "valid_targets_min": 490 + }, + { + "epoch": 5.722222222222222, + "grad_norm": 1.7114236198419897, + "learning_rate": 3.937706157253971e-06, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20456838607788086, + "step": 2575, + "valid_targets_mean": 3842.5, + "valid_targets_min": 206 + }, + { + "epoch": 5.733333333333333, + "grad_norm": 0.6837291674161498, + "learning_rate": 3.871927024782838e-06, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14637309312820435, + "step": 2580, + "valid_targets_mean": 4026.7, + "valid_targets_min": 469 + }, + { + "epoch": 5.7444444444444445, + "grad_norm": 0.7514815970637514, + "learning_rate": 3.80664301747242e-06, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18412163853645325, + "step": 2585, + "valid_targets_mean": 2986.9, + "valid_targets_min": 295 + }, + { + "epoch": 5.7555555555555555, + "grad_norm": 0.5388048398379085, + "learning_rate": 3.741856139514706e-06, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16334733366966248, + "step": 2590, + "valid_targets_mean": 4902.1, + "valid_targets_min": 997 + }, + { + "epoch": 5.766666666666667, + "grad_norm": 0.5233222496715474, + "learning_rate": 3.677568379840011e-06, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16272562742233276, + "step": 2595, + "valid_targets_mean": 4434.1, + "valid_targets_min": 462 + }, + { + "epoch": 5.777777777777778, + "grad_norm": 0.5705709741002777, + "learning_rate": 3.613781712055935e-06, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17690132558345795, + "step": 2600, + "valid_targets_mean": 4264.3, + "valid_targets_min": 325 + }, + { + "epoch": 5.788888888888889, + "grad_norm": 0.5479756129178118, + "learning_rate": 3.5504980943867538e-06, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16624002158641815, + "step": 2605, + "valid_targets_mean": 5080.0, + "valid_targets_min": 530 + }, + { + "epoch": 5.8, + "grad_norm": 0.6635653151891525, + "learning_rate": 3.487719469613331e-06, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028333991765976, + "step": 2610, + "valid_targets_mean": 3572.5, + "valid_targets_min": 367 + }, + { + "epoch": 5.811111111111111, + "grad_norm": 0.44665022968089857, + "learning_rate": 3.4254477650134367e-06, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494394838809967, + "step": 2615, + "valid_targets_mean": 5941.5, + "valid_targets_min": 755 + }, + { + "epoch": 5.822222222222222, + "grad_norm": 0.661754201811149, + "learning_rate": 3.3636848923026257e-06, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18834251165390015, + "step": 2620, + "valid_targets_mean": 2994.5, + "valid_targets_min": 393 + }, + { + "epoch": 5.833333333333333, + "grad_norm": 0.6849647049787121, + "learning_rate": 3.30243274757549e-06, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16348251700401306, + "step": 2625, + "valid_targets_mean": 2972.6, + "valid_targets_min": 997 + }, + { + "epoch": 5.844444444444444, + "grad_norm": 0.6231070446255984, + "learning_rate": 3.2416932112475207e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15320156514644623, + "step": 2630, + "valid_targets_mean": 4418.4, + "valid_targets_min": 685 + }, + { + "epoch": 5.855555555555555, + "grad_norm": 0.5713999743603262, + "learning_rate": 3.1814681479973154e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16935516893863678, + "step": 2635, + "valid_targets_mean": 4568.5, + "valid_targets_min": 761 + }, + { + "epoch": 5.866666666666667, + "grad_norm": 1.2779543648973395, + "learning_rate": 3.121759406709386e-06, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18024687469005585, + "step": 2640, + "valid_targets_mean": 4624.9, + "valid_targets_min": 310 + }, + { + "epoch": 5.877777777777778, + "grad_norm": 0.527748772279261, + "learning_rate": 3.062568820417353e-06, + "loss": 0.1785, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19535987079143524, + "step": 2645, + "valid_targets_mean": 5047.5, + "valid_targets_min": 733 + }, + { + "epoch": 5.888888888888889, + "grad_norm": 0.5667121076341758, + "learning_rate": 3.003898206247704e-06, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724199652671814, + "step": 2650, + "valid_targets_mean": 4225.3, + "valid_targets_min": 957 + }, + { + "epoch": 5.9, + "grad_norm": 0.5448499086146226, + "learning_rate": 2.9457493653639856e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16028910875320435, + "step": 2655, + "valid_targets_mean": 4420.4, + "valid_targets_min": 2211 + }, + { + "epoch": 5.911111111111111, + "grad_norm": 0.5833160229189241, + "learning_rate": 2.8881240829115453e-06, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173763245344162, + "step": 2660, + "valid_targets_mean": 3861.1, + "valid_targets_min": 549 + }, + { + "epoch": 5.9222222222222225, + "grad_norm": 0.5633050658584287, + "learning_rate": 2.8310241279626784e-06, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18552720546722412, + "step": 2665, + "valid_targets_mean": 4328.1, + "valid_targets_min": 406 + }, + { + "epoch": 5.933333333333334, + "grad_norm": 0.6139575137201317, + "learning_rate": 2.774451253462356e-06, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17860981822013855, + "step": 2670, + "valid_targets_mean": 3716.5, + "valid_targets_min": 668 + }, + { + "epoch": 5.944444444444445, + "grad_norm": 0.5214170522074714, + "learning_rate": 2.718407196174391e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18364687263965607, + "step": 2675, + "valid_targets_mean": 5053.6, + "valid_targets_min": 238 + }, + { + "epoch": 5.955555555555556, + "grad_norm": 0.6469395857790785, + "learning_rate": 2.6628936766281375e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19230781495571136, + "step": 2680, + "valid_targets_mean": 3862.8, + "valid_targets_min": 742 + }, + { + "epoch": 5.966666666666667, + "grad_norm": 0.5114626457093362, + "learning_rate": 2.607912399065646e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1596902310848236, + "step": 2685, + "valid_targets_mean": 5641.2, + "valid_targets_min": 306 + }, + { + "epoch": 5.977777777777778, + "grad_norm": 0.5327080602996671, + "learning_rate": 2.5534650513893787e-06, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14958997070789337, + "step": 2690, + "valid_targets_mean": 5039.9, + "valid_targets_min": 792 + }, + { + "epoch": 5.988888888888889, + "grad_norm": 0.6076035884975858, + "learning_rate": 2.4995533051103448e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15890420973300934, + "step": 2695, + "valid_targets_mean": 3991.2, + "valid_targets_min": 300 + }, + { + "epoch": 6.0, + "grad_norm": 0.5398509197397203, + "learning_rate": 2.446178815296838e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.159483402967453, + "step": 2700, + "valid_targets_mean": 4043.2, + "valid_targets_min": 300 + }, + { + "epoch": 6.011111111111111, + "grad_norm": 0.5913571273369431, + "learning_rate": 2.393343220523581e-06, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17935341596603394, + "step": 2705, + "valid_targets_mean": 4581.9, + "valid_targets_min": 1063 + }, + { + "epoch": 6.022222222222222, + "grad_norm": 0.66752029438305, + "learning_rate": 2.3410481428214602e-06, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1461348831653595, + "step": 2710, + "valid_targets_mean": 3675.8, + "valid_targets_min": 489 + }, + { + "epoch": 6.033333333333333, + "grad_norm": 0.5056815928149037, + "learning_rate": 2.2892951876276983e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17451688647270203, + "step": 2715, + "valid_targets_mean": 4982.4, + "valid_targets_min": 193 + }, + { + "epoch": 6.044444444444444, + "grad_norm": 0.5787722267858811, + "learning_rate": 2.2380859437365855e-06, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1561371088027954, + "step": 2720, + "valid_targets_mean": 4155.3, + "valid_targets_min": 467 + }, + { + "epoch": 6.055555555555555, + "grad_norm": 0.5915320981288767, + "learning_rate": 2.187421983250695e-06, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16373848915100098, + "step": 2725, + "valid_targets_mean": 3869.1, + "valid_targets_min": 291 + }, + { + "epoch": 6.066666666666666, + "grad_norm": 0.6317568161952004, + "learning_rate": 2.1373048615326385e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1501501202583313, + "step": 2730, + "valid_targets_mean": 3157.3, + "valid_targets_min": 657 + }, + { + "epoch": 6.0777777777777775, + "grad_norm": 0.5928916539006913, + "learning_rate": 2.0877361171572953e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16607831418514252, + "step": 2735, + "valid_targets_mean": 5381.1, + "valid_targets_min": 366 + }, + { + "epoch": 6.088888888888889, + "grad_norm": 0.5449211144644974, + "learning_rate": 2.0387172718645853e-06, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14824213087558746, + "step": 2740, + "valid_targets_mean": 4073.2, + "valid_targets_min": 901 + }, + { + "epoch": 6.1, + "grad_norm": 0.5683455633442686, + "learning_rate": 1.990249830512756e-06, + "loss": 0.153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16184581816196442, + "step": 2745, + "valid_targets_mean": 4750.2, + "valid_targets_min": 325 + }, + { + "epoch": 6.111111111111111, + "grad_norm": 0.42667335878897045, + "learning_rate": 1.942335281032188e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12961864471435547, + "step": 2750, + "valid_targets_mean": 6349.4, + "valid_targets_min": 2625 + }, + { + "epoch": 6.122222222222222, + "grad_norm": 0.5436823867806183, + "learning_rate": 1.8949750943797051e-06, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14522790908813477, + "step": 2755, + "valid_targets_mean": 4269.0, + "valid_targets_min": 324 + }, + { + "epoch": 6.133333333333334, + "grad_norm": 0.6448799255936731, + "learning_rate": 1.8481707244934232e-06, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15584322810173035, + "step": 2760, + "valid_targets_mean": 4016.6, + "valid_targets_min": 445 + }, + { + "epoch": 6.144444444444445, + "grad_norm": 0.5421598615393723, + "learning_rate": 1.8019236082481063e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1767968237400055, + "step": 2765, + "valid_targets_mean": 4966.3, + "valid_targets_min": 734 + }, + { + "epoch": 6.155555555555556, + "grad_norm": 0.5685111593837506, + "learning_rate": 1.7562351654110776e-06, + "loss": 0.1602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1697826236486435, + "step": 2770, + "valid_targets_mean": 4455.9, + "valid_targets_min": 263 + }, + { + "epoch": 6.166666666666667, + "grad_norm": 0.5013193289509864, + "learning_rate": 1.711106798598603e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14647771418094635, + "step": 2775, + "valid_targets_mean": 4696.4, + "valid_targets_min": 1329 + }, + { + "epoch": 6.177777777777778, + "grad_norm": 0.5765680957861777, + "learning_rate": 1.6665398932328615e-06, + "loss": 0.1843, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21099118888378143, + "step": 2780, + "valid_targets_mean": 4673.8, + "valid_targets_min": 328 + }, + { + "epoch": 6.188888888888889, + "grad_norm": 0.6071624204964592, + "learning_rate": 1.6225358174993866e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15273860096931458, + "step": 2785, + "valid_targets_mean": 4286.6, + "valid_targets_min": 537 + }, + { + "epoch": 6.2, + "grad_norm": 0.6182352698209433, + "learning_rate": 1.5790959223050761e-06, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19237971305847168, + "step": 2790, + "valid_targets_mean": 3695.6, + "valid_targets_min": 344 + }, + { + "epoch": 6.211111111111111, + "grad_norm": 0.588213406617714, + "learning_rate": 1.5362215412367198e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15135380625724792, + "step": 2795, + "valid_targets_mean": 3756.7, + "valid_targets_min": 1214 + }, + { + "epoch": 6.222222222222222, + "grad_norm": 0.6346814814248117, + "learning_rate": 1.493913990520066e-06, + "loss": 0.1853, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20901688933372498, + "step": 2800, + "valid_targets_mean": 4734.8, + "valid_targets_min": 360 + }, + { + "epoch": 6.233333333333333, + "grad_norm": 0.6257271615557687, + "learning_rate": 1.4521745689793942e-06, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15938270092010498, + "step": 2805, + "valid_targets_mean": 5421.2, + "valid_targets_min": 635 + }, + { + "epoch": 6.2444444444444445, + "grad_norm": 0.559553595120576, + "learning_rate": 1.4110045579976638e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16130048036575317, + "step": 2810, + "valid_targets_mean": 4333.5, + "valid_targets_min": 586 + }, + { + "epoch": 6.2555555555555555, + "grad_norm": 0.5866773758858824, + "learning_rate": 1.3704052214771513e-06, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20442438125610352, + "step": 2815, + "valid_targets_mean": 4559.4, + "valid_targets_min": 491 + }, + { + "epoch": 6.266666666666667, + "grad_norm": 0.5344071459257823, + "learning_rate": 1.3303778058006844e-06, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1532367765903473, + "step": 2820, + "valid_targets_mean": 4964.8, + "valid_targets_min": 1225 + }, + { + "epoch": 6.277777777777778, + "grad_norm": 0.4690750876544389, + "learning_rate": 1.2909235397933429e-06, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18918469548225403, + "step": 2825, + "valid_targets_mean": 6092.4, + "valid_targets_min": 809 + }, + { + "epoch": 6.288888888888889, + "grad_norm": 0.5452380646504179, + "learning_rate": 1.2520436346847498e-06, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1750721037387848, + "step": 2830, + "valid_targets_mean": 4615.1, + "valid_targets_min": 231 + }, + { + "epoch": 6.3, + "grad_norm": 0.7863745243804952, + "learning_rate": 1.213739284071891e-06, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1903843879699707, + "step": 2835, + "valid_targets_mean": 3657.9, + "valid_targets_min": 783 + }, + { + "epoch": 6.311111111111111, + "grad_norm": 0.5931382218715537, + "learning_rate": 1.176011663882466e-06, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17225001752376556, + "step": 2840, + "valid_targets_mean": 3728.6, + "valid_targets_min": 394 + }, + { + "epoch": 6.322222222222222, + "grad_norm": 0.5189346871656337, + "learning_rate": 1.1388619323387884e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17915725708007812, + "step": 2845, + "valid_targets_mean": 5191.2, + "valid_targets_min": 647 + }, + { + "epoch": 6.333333333333333, + "grad_norm": 0.5685337335007038, + "learning_rate": 1.1022912299222387e-06, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16851413249969482, + "step": 2850, + "valid_targets_mean": 4485.8, + "valid_targets_min": 855 + }, + { + "epoch": 6.344444444444444, + "grad_norm": 0.7195861370160256, + "learning_rate": 1.0663006793382214e-06, + "loss": 0.1607, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1574927270412445, + "step": 2855, + "valid_targets_mean": 3559.8, + "valid_targets_min": 576 + }, + { + "epoch": 6.355555555555555, + "grad_norm": 0.5762158096256034, + "learning_rate": 1.0308913854817425e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1543298363685608, + "step": 2860, + "valid_targets_mean": 4227.9, + "valid_targets_min": 450 + }, + { + "epoch": 6.366666666666666, + "grad_norm": 0.581243646224178, + "learning_rate": 9.960644354034544e-07, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16603925824165344, + "step": 2865, + "valid_targets_mean": 3848.9, + "valid_targets_min": 539 + }, + { + "epoch": 6.377777777777778, + "grad_norm": 0.5860189314743979, + "learning_rate": 9.618208982763045e-07, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17424118518829346, + "step": 2870, + "valid_targets_mean": 4603.6, + "valid_targets_min": 820 + }, + { + "epoch": 6.388888888888889, + "grad_norm": 0.5260553051504206, + "learning_rate": 9.281618253626967e-07, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1587425321340561, + "step": 2875, + "valid_targets_mean": 4868.8, + "valid_targets_min": 623 + }, + { + "epoch": 6.4, + "grad_norm": 0.5709882539258184, + "learning_rate": 8.950882499822322e-07, + "loss": 0.157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18391573429107666, + "step": 2880, + "valid_targets_mean": 4826.9, + "valid_targets_min": 955 + }, + { + "epoch": 6.411111111111111, + "grad_norm": 0.5644788262306717, + "learning_rate": 8.626011874799723e-07, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18057206273078918, + "step": 2885, + "valid_targets_mean": 4512.6, + "valid_targets_min": 219 + }, + { + "epoch": 6.4222222222222225, + "grad_norm": 0.576346831291659, + "learning_rate": 8.307016351952857e-07, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.166375070810318, + "step": 2890, + "valid_targets_mean": 4210.4, + "valid_targets_min": 1171 + }, + { + "epoch": 6.433333333333334, + "grad_norm": 0.5757462663579878, + "learning_rate": 7.993905724312156e-07, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1599673181772232, + "step": 2895, + "valid_targets_mean": 4139.4, + "valid_targets_min": 566 + }, + { + "epoch": 6.444444444444445, + "grad_norm": 0.5623689294928453, + "learning_rate": 7.686689604244191e-07, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19040706753730774, + "step": 2900, + "valid_targets_mean": 4728.7, + "valid_targets_min": 992 + }, + { + "epoch": 6.455555555555556, + "grad_norm": 0.598246014236927, + "learning_rate": 7.385377423156592e-07, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1700592339038849, + "step": 2905, + "valid_targets_mean": 3927.4, + "valid_targets_min": 507 + }, + { + "epoch": 6.466666666666667, + "grad_norm": 0.6203480477245517, + "learning_rate": 7.0899784312086e-07, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1759098470211029, + "step": 2910, + "valid_targets_mean": 3698.8, + "valid_targets_min": 923 + }, + { + "epoch": 6.477777777777778, + "grad_norm": 0.7017472734434613, + "learning_rate": 6.800501697026817e-07, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1844363808631897, + "step": 2915, + "valid_targets_mean": 3652.4, + "valid_targets_min": 745 + }, + { + "epoch": 6.488888888888889, + "grad_norm": 0.8080719060022912, + "learning_rate": 6.516956107427241e-07, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1818864643573761, + "step": 2920, + "valid_targets_mean": 3598.4, + "valid_targets_min": 322 + }, + { + "epoch": 6.5, + "grad_norm": 0.6080050661997464, + "learning_rate": 6.239350367141872e-07, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1631316989660263, + "step": 2925, + "valid_targets_mean": 3850.2, + "valid_targets_min": 577 + }, + { + "epoch": 6.511111111111111, + "grad_norm": 0.5218209158596074, + "learning_rate": 5.967692998552088e-07, + "loss": 0.1734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1616230010986328, + "step": 2930, + "valid_targets_mean": 4195.7, + "valid_targets_min": 214 + }, + { + "epoch": 6.522222222222222, + "grad_norm": 0.583948409979544, + "learning_rate": 5.701992341426499e-07, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16667166352272034, + "step": 2935, + "valid_targets_mean": 4227.6, + "valid_targets_min": 443 + }, + { + "epoch": 6.533333333333333, + "grad_norm": 0.6357686417725928, + "learning_rate": 5.442256552665326e-07, + "loss": 0.1612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1789218783378601, + "step": 2940, + "valid_targets_mean": 4223.6, + "valid_targets_min": 549 + }, + { + "epoch": 6.544444444444444, + "grad_norm": 0.5545850268570705, + "learning_rate": 5.188493606049672e-07, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16391915082931519, + "step": 2945, + "valid_targets_mean": 4468.6, + "valid_targets_min": 306 + }, + { + "epoch": 6.555555555555555, + "grad_norm": 0.5370700566695868, + "learning_rate": 4.940711291996891e-07, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1655001938343048, + "step": 2950, + "valid_targets_mean": 4653.1, + "valid_targets_min": 524 + }, + { + "epoch": 6.566666666666666, + "grad_norm": 0.5816674164537832, + "learning_rate": 4.698917217321408e-07, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16322006285190582, + "step": 2955, + "valid_targets_mean": 4279.8, + "valid_targets_min": 791 + }, + { + "epoch": 6.5777777777777775, + "grad_norm": 0.591268448436149, + "learning_rate": 4.4631188050011654e-07, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15823054313659668, + "step": 2960, + "valid_targets_mean": 4439.2, + "valid_targets_min": 299 + }, + { + "epoch": 6.588888888888889, + "grad_norm": 0.5923470499206334, + "learning_rate": 4.2333232939498094e-07, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15806478261947632, + "step": 2965, + "valid_targets_mean": 4299.8, + "valid_targets_min": 557 + }, + { + "epoch": 6.6, + "grad_norm": 0.604171037099629, + "learning_rate": 4.009537738794289e-07, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18534332513809204, + "step": 2970, + "valid_targets_mean": 3794.3, + "valid_targets_min": 1163 + }, + { + "epoch": 6.611111111111111, + "grad_norm": 0.5959118836475591, + "learning_rate": 3.791769009658497e-07, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17430004477500916, + "step": 2975, + "valid_targets_mean": 4548.9, + "valid_targets_min": 689 + }, + { + "epoch": 6.622222222222222, + "grad_norm": 0.7602527640872502, + "learning_rate": 3.5800237919522363e-07, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1884760558605194, + "step": 2980, + "valid_targets_mean": 4584.7, + "valid_targets_min": 1115 + }, + { + "epoch": 6.633333333333333, + "grad_norm": 0.5214769514587413, + "learning_rate": 3.3743085861659643e-07, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16529062390327454, + "step": 2985, + "valid_targets_mean": 4972.1, + "valid_targets_min": 662 + }, + { + "epoch": 6.644444444444445, + "grad_norm": 0.5344722245636476, + "learning_rate": 3.1746297076713504e-07, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14969675242900848, + "step": 2990, + "valid_targets_mean": 4986.3, + "valid_targets_min": 1685 + }, + { + "epoch": 6.655555555555556, + "grad_norm": 0.5351628577936627, + "learning_rate": 2.9809932865271893e-07, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16871917247772217, + "step": 2995, + "valid_targets_mean": 4784.9, + "valid_targets_min": 691 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.5030921817708136, + "learning_rate": 2.793405267291505e-07, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15890008211135864, + "step": 3000, + "valid_targets_mean": 5473.4, + "valid_targets_min": 1108 + }, + { + "epoch": 6.677777777777778, + "grad_norm": 0.6571885910780609, + "learning_rate": 2.6118714088386954e-07, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18136438727378845, + "step": 3005, + "valid_targets_mean": 3401.2, + "valid_targets_min": 257 + }, + { + "epoch": 6.688888888888889, + "grad_norm": 0.6046227631736789, + "learning_rate": 2.436397284183123e-07, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1641228199005127, + "step": 3010, + "valid_targets_mean": 3765.2, + "valid_targets_min": 695 + }, + { + "epoch": 6.7, + "grad_norm": 0.5844047544930174, + "learning_rate": 2.2669882803076916e-07, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15143552422523499, + "step": 3015, + "valid_targets_mean": 4498.2, + "valid_targets_min": 672 + }, + { + "epoch": 6.711111111111111, + "grad_norm": 0.6149713586123504, + "learning_rate": 2.1036495979986692e-07, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15604856610298157, + "step": 3020, + "valid_targets_mean": 3861.8, + "valid_targets_min": 225 + }, + { + "epoch": 6.722222222222222, + "grad_norm": 0.5723560396309636, + "learning_rate": 1.9463862516859277e-07, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12534162402153015, + "step": 3025, + "valid_targets_mean": 3602.2, + "valid_targets_min": 401 + }, + { + "epoch": 6.733333333333333, + "grad_norm": 0.4850032616094909, + "learning_rate": 1.7952030692891086e-07, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1640266627073288, + "step": 3030, + "valid_targets_mean": 4796.8, + "valid_targets_min": 802 + }, + { + "epoch": 6.7444444444444445, + "grad_norm": 0.5571942927801525, + "learning_rate": 1.6501046920692986e-07, + "loss": 0.1747, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18630409240722656, + "step": 3035, + "valid_targets_mean": 4979.9, + "valid_targets_min": 374 + }, + { + "epoch": 6.7555555555555555, + "grad_norm": 0.6261160646072778, + "learning_rate": 1.511095574486543e-07, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13854405283927917, + "step": 3040, + "valid_targets_mean": 3824.3, + "valid_targets_min": 449 + }, + { + "epoch": 6.766666666666667, + "grad_norm": 0.6318415495546549, + "learning_rate": 1.378179984063177e-07, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.186091810464859, + "step": 3045, + "valid_targets_mean": 4292.3, + "valid_targets_min": 389 + }, + { + "epoch": 6.777777777777778, + "grad_norm": 0.6057679446128716, + "learning_rate": 1.2513620012528427e-07, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18621055781841278, + "step": 3050, + "valid_targets_mean": 4053.4, + "valid_targets_min": 289 + }, + { + "epoch": 6.788888888888889, + "grad_norm": 0.8744644549026661, + "learning_rate": 1.1306455193150323e-07, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20955529808998108, + "step": 3055, + "valid_targets_mean": 3671.4, + "valid_targets_min": 482 + }, + { + "epoch": 6.8, + "grad_norm": 0.6599223412568986, + "learning_rate": 1.0160342441957626e-07, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19978420436382294, + "step": 3060, + "valid_targets_mean": 3478.9, + "valid_targets_min": 300 + }, + { + "epoch": 6.811111111111111, + "grad_norm": 0.5322339408865758, + "learning_rate": 9.07531694413688e-08, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14194843173027039, + "step": 3065, + "valid_targets_mean": 5484.3, + "valid_targets_min": 2467 + }, + { + "epoch": 6.822222222222222, + "grad_norm": 0.5448297735203173, + "learning_rate": 8.051412009521864e-08, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15910744667053223, + "step": 3070, + "valid_targets_mean": 4950.7, + "valid_targets_min": 750 + }, + { + "epoch": 6.833333333333333, + "grad_norm": 0.5207228811956566, + "learning_rate": 7.08865907156997e-08, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15996085107326508, + "step": 3075, + "valid_targets_mean": 5696.6, + "valid_targets_min": 1801 + }, + { + "epoch": 6.844444444444444, + "grad_norm": 0.588737424620841, + "learning_rate": 6.187087686397641e-08, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16847629845142365, + "step": 3080, + "valid_targets_mean": 3962.7, + "valid_targets_min": 243 + }, + { + "epoch": 6.855555555555555, + "grad_norm": 0.8953529078704728, + "learning_rate": 5.3467255318726544e-08, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1842920035123825, + "step": 3085, + "valid_targets_mean": 3766.0, + "valid_targets_min": 436 + }, + { + "epoch": 6.866666666666667, + "grad_norm": 0.6312232512655556, + "learning_rate": 4.567598406765461e-08, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14916302263736725, + "step": 3090, + "valid_targets_mean": 4468.3, + "valid_targets_min": 292 + }, + { + "epoch": 6.877777777777778, + "grad_norm": 0.5616637532565867, + "learning_rate": 3.84973022995605e-08, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13993391394615173, + "step": 3095, + "valid_targets_mean": 4598.1, + "valid_targets_min": 479 + }, + { + "epoch": 6.888888888888889, + "grad_norm": 0.5833668585034318, + "learning_rate": 3.193143039700086e-08, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1864895224571228, + "step": 3100, + "valid_targets_mean": 4208.8, + "valid_targets_min": 731 + }, + { + "epoch": 6.9, + "grad_norm": 0.571580775093832, + "learning_rate": 2.597856992952341e-08, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16089646518230438, + "step": 3105, + "valid_targets_mean": 5259.4, + "valid_targets_min": 1133 + }, + { + "epoch": 6.911111111111111, + "grad_norm": 0.5133595187461827, + "learning_rate": 2.063890364748078e-08, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1508500874042511, + "step": 3110, + "valid_targets_mean": 5170.9, + "valid_targets_min": 1010 + }, + { + "epoch": 6.9222222222222225, + "grad_norm": 0.5585279295675786, + "learning_rate": 1.5912595476414993e-08, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17111481726169586, + "step": 3115, + "valid_targets_mean": 4617.8, + "valid_targets_min": 589 + }, + { + "epoch": 6.933333333333334, + "grad_norm": 0.5333435138003515, + "learning_rate": 1.1799790512030395e-08, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14488770067691803, + "step": 3120, + "valid_targets_mean": 4679.4, + "valid_targets_min": 835 + }, + { + "epoch": 6.944444444444445, + "grad_norm": 0.5560785893229921, + "learning_rate": 8.300615015734981e-09, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14593376219272614, + "step": 3125, + "valid_targets_mean": 4246.2, + "valid_targets_min": 616 + }, + { + "epoch": 6.955555555555556, + "grad_norm": 0.7126140858082262, + "learning_rate": 5.415176410765721e-09, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18622633814811707, + "step": 3130, + "valid_targets_mean": 3116.1, + "valid_targets_min": 263 + }, + { + "epoch": 6.966666666666667, + "grad_norm": 0.5293643056538982, + "learning_rate": 3.1435632788956448e-09, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14810457825660706, + "step": 3135, + "valid_targets_mean": 4385.9, + "valid_targets_min": 369 + }, + { + "epoch": 6.977777777777778, + "grad_norm": 0.6565261395938169, + "learning_rate": 1.4858453577071275e-09, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16837093234062195, + "step": 3140, + "valid_targets_mean": 3301.8, + "valid_targets_min": 361 + }, + { + "epoch": 6.988888888888889, + "grad_norm": 0.53850752681232, + "learning_rate": 4.4207353845360234e-10, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17763689160346985, + "step": 3145, + "valid_targets_mean": 5262.4, + "valid_targets_min": 2341 + }, + { + "epoch": 7.0, + "grad_norm": 0.6203043399932429, + "learning_rate": 1.2279864494146865e-11, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19432786107063293, + "step": 3150, + "valid_targets_mean": 4280.6, + "valid_targets_min": 315 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19432786107063293, + "step": 3150, + "total_flos": 1055465597173760.0, + "train_loss": 0.21853318925887819, + "train_runtime": 17876.6321, + "train_samples_per_second": 2.818, + "train_steps_per_second": 0.176, + "valid_targets_mean": 4280.6, + "valid_targets_min": 315 + } + ], + "logging_steps": 5, + "max_steps": 3150, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1055465597173760.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}