diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10178 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4606, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007598784194528876, + "grad_norm": 16.54100795724015, + "learning_rate": 3.4707158351409984e-07, + "loss": 0.6415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6890859603881836, + "step": 5, + "valid_targets_mean": 4029.6, + "valid_targets_min": 742 + }, + { + "epoch": 0.015197568389057751, + "grad_norm": 17.760320126405695, + "learning_rate": 7.809110629067245e-07, + "loss": 0.6735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6768203973770142, + "step": 10, + "valid_targets_mean": 4724.0, + "valid_targets_min": 333 + }, + { + "epoch": 0.022796352583586626, + "grad_norm": 14.700502278309516, + "learning_rate": 1.2147505422993492e-06, + "loss": 0.6621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6386404633522034, + "step": 15, + "valid_targets_mean": 4853.7, + "valid_targets_min": 1059 + }, + { + "epoch": 0.030395136778115502, + "grad_norm": 12.016024137339164, + "learning_rate": 1.6485900216919743e-06, + "loss": 0.6268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6092511415481567, + "step": 20, + "valid_targets_mean": 5505.4, + "valid_targets_min": 2608 + }, + { + "epoch": 0.037993920972644375, + "grad_norm": 7.202624222295684, + "learning_rate": 2.0824295010845986e-06, + "loss": 0.5318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4805890917778015, + "step": 25, + "valid_targets_mean": 4894.8, + "valid_targets_min": 746 + }, + { + "epoch": 0.04559270516717325, + "grad_norm": 5.248039681477442, + "learning_rate": 2.516268980477224e-06, + "loss": 0.5034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5152153372764587, + "step": 30, + "valid_targets_mean": 4687.7, + "valid_targets_min": 1705 + }, + { + "epoch": 0.05319148936170213, + "grad_norm": 2.714176181066118, + "learning_rate": 2.950108459869848e-06, + "loss": 0.4815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4753047823905945, + "step": 35, + "valid_targets_mean": 4859.7, + "valid_targets_min": 456 + }, + { + "epoch": 0.060790273556231005, + "grad_norm": 1.7777692231211755, + "learning_rate": 3.383947939262473e-06, + "loss": 0.4372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4611857831478119, + "step": 40, + "valid_targets_mean": 4217.9, + "valid_targets_min": 741 + }, + { + "epoch": 0.06838905775075987, + "grad_norm": 1.3787959870798525, + "learning_rate": 3.817787418655098e-06, + "loss": 0.4164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4521799385547638, + "step": 45, + "valid_targets_mean": 3577.2, + "valid_targets_min": 525 + }, + { + "epoch": 0.07598784194528875, + "grad_norm": 1.0709482777091444, + "learning_rate": 4.251626898047723e-06, + "loss": 0.4129, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.45762407779693604, + "step": 50, + "valid_targets_mean": 4046.8, + "valid_targets_min": 847 + }, + { + "epoch": 0.08358662613981763, + "grad_norm": 0.8316164594444523, + "learning_rate": 4.685466377440348e-06, + "loss": 0.3775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38839221000671387, + "step": 55, + "valid_targets_mean": 5905.9, + "valid_targets_min": 1232 + }, + { + "epoch": 0.0911854103343465, + "grad_norm": 0.7446660666424425, + "learning_rate": 5.1193058568329725e-06, + "loss": 0.3891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.34218358993530273, + "step": 60, + "valid_targets_mean": 4599.2, + "valid_targets_min": 554 + }, + { + "epoch": 0.09878419452887538, + "grad_norm": 0.8425007884341953, + "learning_rate": 5.5531453362255974e-06, + "loss": 0.3781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36715853214263916, + "step": 65, + "valid_targets_mean": 4719.6, + "valid_targets_min": 590 + }, + { + "epoch": 0.10638297872340426, + "grad_norm": 0.6619149306280321, + "learning_rate": 5.986984815618222e-06, + "loss": 0.3861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3819279968738556, + "step": 70, + "valid_targets_mean": 4757.8, + "valid_targets_min": 881 + }, + { + "epoch": 0.11398176291793313, + "grad_norm": 0.5731111234637317, + "learning_rate": 6.420824295010846e-06, + "loss": 0.3629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3706345558166504, + "step": 75, + "valid_targets_mean": 5672.5, + "valid_targets_min": 1128 + }, + { + "epoch": 0.12158054711246201, + "grad_norm": 0.6080686326455707, + "learning_rate": 6.854663774403471e-06, + "loss": 0.3312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.38144651055336, + "step": 80, + "valid_targets_mean": 4179.6, + "valid_targets_min": 814 + }, + { + "epoch": 0.12917933130699089, + "grad_norm": 0.6499495432455763, + "learning_rate": 7.288503253796096e-06, + "loss": 0.3446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3940460979938507, + "step": 85, + "valid_targets_mean": 4595.6, + "valid_targets_min": 583 + }, + { + "epoch": 0.13677811550151975, + "grad_norm": 0.4843533852497594, + "learning_rate": 7.722342733188721e-06, + "loss": 0.332, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3182428181171417, + "step": 90, + "valid_targets_mean": 5585.4, + "valid_targets_min": 740 + }, + { + "epoch": 0.14437689969604864, + "grad_norm": 0.6242189491842878, + "learning_rate": 8.156182212581345e-06, + "loss": 0.3316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3119521737098694, + "step": 95, + "valid_targets_mean": 5690.2, + "valid_targets_min": 792 + }, + { + "epoch": 0.1519756838905775, + "grad_norm": 0.5346923069880523, + "learning_rate": 8.59002169197397e-06, + "loss": 0.3313, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31788039207458496, + "step": 100, + "valid_targets_mean": 4587.9, + "valid_targets_min": 767 + }, + { + "epoch": 0.1595744680851064, + "grad_norm": 0.5073268077298685, + "learning_rate": 9.023861171366595e-06, + "loss": 0.3194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3051730990409851, + "step": 105, + "valid_targets_mean": 5143.8, + "valid_targets_min": 823 + }, + { + "epoch": 0.16717325227963525, + "grad_norm": 0.5467162692473684, + "learning_rate": 9.457700650759219e-06, + "loss": 0.3018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32401126623153687, + "step": 110, + "valid_targets_mean": 5420.0, + "valid_targets_min": 674 + }, + { + "epoch": 0.17477203647416414, + "grad_norm": 0.5541588789236693, + "learning_rate": 9.891540130151845e-06, + "loss": 0.3334, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3607725501060486, + "step": 115, + "valid_targets_mean": 5116.0, + "valid_targets_min": 569 + }, + { + "epoch": 0.182370820668693, + "grad_norm": 0.5403092031436869, + "learning_rate": 1.032537960954447e-05, + "loss": 0.2937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.290938138961792, + "step": 120, + "valid_targets_mean": 4455.6, + "valid_targets_min": 1819 + }, + { + "epoch": 0.1899696048632219, + "grad_norm": 0.5924655899271373, + "learning_rate": 1.0759219088937095e-05, + "loss": 0.3343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31885042786598206, + "step": 125, + "valid_targets_mean": 3642.6, + "valid_targets_min": 401 + }, + { + "epoch": 0.19756838905775076, + "grad_norm": 0.49980402806514684, + "learning_rate": 1.119305856832972e-05, + "loss": 0.2857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2728749215602875, + "step": 130, + "valid_targets_mean": 4462.3, + "valid_targets_min": 585 + }, + { + "epoch": 0.20516717325227962, + "grad_norm": 0.5224338478901207, + "learning_rate": 1.1626898047722344e-05, + "loss": 0.2962, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29036590456962585, + "step": 135, + "valid_targets_mean": 4953.9, + "valid_targets_min": 901 + }, + { + "epoch": 0.2127659574468085, + "grad_norm": 0.5647977318046398, + "learning_rate": 1.2060737527114967e-05, + "loss": 0.2992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30531400442123413, + "step": 140, + "valid_targets_mean": 3919.9, + "valid_targets_min": 797 + }, + { + "epoch": 0.22036474164133737, + "grad_norm": 0.5768676708119788, + "learning_rate": 1.2494577006507593e-05, + "loss": 0.3144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3926551342010498, + "step": 145, + "valid_targets_mean": 4445.3, + "valid_targets_min": 595 + }, + { + "epoch": 0.22796352583586627, + "grad_norm": 0.46004731662025494, + "learning_rate": 1.2928416485900217e-05, + "loss": 0.2964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23685210943222046, + "step": 150, + "valid_targets_mean": 4963.2, + "valid_targets_min": 1646 + }, + { + "epoch": 0.23556231003039513, + "grad_norm": 0.6456923331623713, + "learning_rate": 1.3362255965292842e-05, + "loss": 0.2947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.32530516386032104, + "step": 155, + "valid_targets_mean": 3147.6, + "valid_targets_min": 713 + }, + { + "epoch": 0.24316109422492402, + "grad_norm": 0.5132446277120045, + "learning_rate": 1.3796095444685466e-05, + "loss": 0.2866, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3017147481441498, + "step": 160, + "valid_targets_mean": 5447.2, + "valid_targets_min": 662 + }, + { + "epoch": 0.2507598784194529, + "grad_norm": 0.5526604062654956, + "learning_rate": 1.4229934924078092e-05, + "loss": 0.2781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2687798738479614, + "step": 165, + "valid_targets_mean": 4341.5, + "valid_targets_min": 756 + }, + { + "epoch": 0.25835866261398177, + "grad_norm": 0.5293460369161004, + "learning_rate": 1.4663774403470716e-05, + "loss": 0.2804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28431305289268494, + "step": 170, + "valid_targets_mean": 6130.4, + "valid_targets_min": 1724 + }, + { + "epoch": 0.26595744680851063, + "grad_norm": 0.5138145073678304, + "learning_rate": 1.5097613882863342e-05, + "loss": 0.2892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29994410276412964, + "step": 175, + "valid_targets_mean": 4394.1, + "valid_targets_min": 589 + }, + { + "epoch": 0.2735562310030395, + "grad_norm": 0.5043230432581788, + "learning_rate": 1.5531453362255964e-05, + "loss": 0.2841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2635194659233093, + "step": 180, + "valid_targets_mean": 4637.4, + "valid_targets_min": 588 + }, + { + "epoch": 0.2811550151975684, + "grad_norm": 0.5633614566403176, + "learning_rate": 1.5965292841648592e-05, + "loss": 0.2694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.349490761756897, + "step": 185, + "valid_targets_mean": 5181.4, + "valid_targets_min": 1743 + }, + { + "epoch": 0.2887537993920973, + "grad_norm": 0.5633432190899903, + "learning_rate": 1.6399132321041216e-05, + "loss": 0.2859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28400588035583496, + "step": 190, + "valid_targets_mean": 4035.6, + "valid_targets_min": 1318 + }, + { + "epoch": 0.29635258358662614, + "grad_norm": 0.48584306761961266, + "learning_rate": 1.6832971800433843e-05, + "loss": 0.2778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2302139550447464, + "step": 195, + "valid_targets_mean": 5217.0, + "valid_targets_min": 1825 + }, + { + "epoch": 0.303951367781155, + "grad_norm": 0.5423337122654377, + "learning_rate": 1.7266811279826464e-05, + "loss": 0.265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2699054479598999, + "step": 200, + "valid_targets_mean": 4060.4, + "valid_targets_min": 824 + }, + { + "epoch": 0.31155015197568386, + "grad_norm": 0.583883380975703, + "learning_rate": 1.770065075921909e-05, + "loss": 0.263, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28162840008735657, + "step": 205, + "valid_targets_mean": 4595.1, + "valid_targets_min": 446 + }, + { + "epoch": 0.3191489361702128, + "grad_norm": 0.5187106940915953, + "learning_rate": 1.8134490238611715e-05, + "loss": 0.2586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25653451681137085, + "step": 210, + "valid_targets_mean": 4721.1, + "valid_targets_min": 885 + }, + { + "epoch": 0.32674772036474165, + "grad_norm": 0.5079241607768279, + "learning_rate": 1.856832971800434e-05, + "loss": 0.2524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.257007360458374, + "step": 215, + "valid_targets_mean": 5210.9, + "valid_targets_min": 784 + }, + { + "epoch": 0.3343465045592705, + "grad_norm": 0.6159525490464162, + "learning_rate": 1.9002169197396964e-05, + "loss": 0.2707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3035469651222229, + "step": 220, + "valid_targets_mean": 4253.7, + "valid_targets_min": 670 + }, + { + "epoch": 0.34194528875379937, + "grad_norm": 0.618048775817979, + "learning_rate": 1.9436008676789588e-05, + "loss": 0.2914, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3232946991920471, + "step": 225, + "valid_targets_mean": 3915.6, + "valid_targets_min": 686 + }, + { + "epoch": 0.3495440729483283, + "grad_norm": 0.4988454732686451, + "learning_rate": 1.9869848156182215e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2464175820350647, + "step": 230, + "valid_targets_mean": 4478.2, + "valid_targets_min": 1934 + }, + { + "epoch": 0.35714285714285715, + "grad_norm": 0.5788630961414701, + "learning_rate": 2.030368763557484e-05, + "loss": 0.2697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25093919038772583, + "step": 235, + "valid_targets_mean": 4403.9, + "valid_targets_min": 2002 + }, + { + "epoch": 0.364741641337386, + "grad_norm": 0.5727233609316779, + "learning_rate": 2.0737527114967463e-05, + "loss": 0.2787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2930532693862915, + "step": 240, + "valid_targets_mean": 3878.2, + "valid_targets_min": 683 + }, + { + "epoch": 0.3723404255319149, + "grad_norm": 0.507643333171839, + "learning_rate": 2.117136659436009e-05, + "loss": 0.2612, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26503849029541016, + "step": 245, + "valid_targets_mean": 5116.3, + "valid_targets_min": 1771 + }, + { + "epoch": 0.3799392097264438, + "grad_norm": 0.5575549222120635, + "learning_rate": 2.160520607375271e-05, + "loss": 0.239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24154013395309448, + "step": 250, + "valid_targets_mean": 4581.1, + "valid_targets_min": 446 + }, + { + "epoch": 0.38753799392097266, + "grad_norm": 0.5133849867561285, + "learning_rate": 2.203904555314534e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2547852694988251, + "step": 255, + "valid_targets_mean": 4651.8, + "valid_targets_min": 434 + }, + { + "epoch": 0.3951367781155015, + "grad_norm": 0.46783460540928934, + "learning_rate": 2.2472885032537963e-05, + "loss": 0.2714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23877258598804474, + "step": 260, + "valid_targets_mean": 5490.8, + "valid_targets_min": 1221 + }, + { + "epoch": 0.4027355623100304, + "grad_norm": 0.5446072363730794, + "learning_rate": 2.290672451193059e-05, + "loss": 0.2643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23716062307357788, + "step": 265, + "valid_targets_mean": 4415.1, + "valid_targets_min": 567 + }, + { + "epoch": 0.41033434650455924, + "grad_norm": 0.5076237944415188, + "learning_rate": 2.334056399132321e-05, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2342528998851776, + "step": 270, + "valid_targets_mean": 4982.2, + "valid_targets_min": 867 + }, + { + "epoch": 0.41793313069908816, + "grad_norm": 0.5204833234773354, + "learning_rate": 2.3774403470715835e-05, + "loss": 0.2722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24817609786987305, + "step": 275, + "valid_targets_mean": 4886.1, + "valid_targets_min": 1124 + }, + { + "epoch": 0.425531914893617, + "grad_norm": 1.1929428353263904, + "learning_rate": 2.4208242950108462e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2741398513317108, + "step": 280, + "valid_targets_mean": 5717.5, + "valid_targets_min": 863 + }, + { + "epoch": 0.4331306990881459, + "grad_norm": 0.585914448752194, + "learning_rate": 2.464208242950109e-05, + "loss": 0.2498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27346092462539673, + "step": 285, + "valid_targets_mean": 4714.8, + "valid_targets_min": 738 + }, + { + "epoch": 0.44072948328267475, + "grad_norm": 0.6700936401277653, + "learning_rate": 2.507592190889371e-05, + "loss": 0.2714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3218214511871338, + "step": 290, + "valid_targets_mean": 3352.8, + "valid_targets_min": 665 + }, + { + "epoch": 0.44832826747720367, + "grad_norm": 0.5819949812246825, + "learning_rate": 2.5509761388286335e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24475443363189697, + "step": 295, + "valid_targets_mean": 5024.5, + "valid_targets_min": 1008 + }, + { + "epoch": 0.45592705167173253, + "grad_norm": 0.5564851893611434, + "learning_rate": 2.5943600867678962e-05, + "loss": 0.2715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25335440039634705, + "step": 300, + "valid_targets_mean": 4271.6, + "valid_targets_min": 495 + }, + { + "epoch": 0.4635258358662614, + "grad_norm": 0.6776311836920197, + "learning_rate": 2.637744034707159e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26621827483177185, + "step": 305, + "valid_targets_mean": 4792.2, + "valid_targets_min": 585 + }, + { + "epoch": 0.47112462006079026, + "grad_norm": 0.5014128476072633, + "learning_rate": 2.681127982646421e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23652535676956177, + "step": 310, + "valid_targets_mean": 5203.9, + "valid_targets_min": 631 + }, + { + "epoch": 0.4787234042553192, + "grad_norm": 0.5471285546055437, + "learning_rate": 2.7245119305856834e-05, + "loss": 0.2392, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2725940942764282, + "step": 315, + "valid_targets_mean": 4777.1, + "valid_targets_min": 1762 + }, + { + "epoch": 0.48632218844984804, + "grad_norm": 0.5215837029470914, + "learning_rate": 2.7678958785249462e-05, + "loss": 0.2357, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2674940228462219, + "step": 320, + "valid_targets_mean": 4912.2, + "valid_targets_min": 1286 + }, + { + "epoch": 0.4939209726443769, + "grad_norm": 0.5545464098953832, + "learning_rate": 2.8112798264642082e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27066224813461304, + "step": 325, + "valid_targets_mean": 4713.8, + "valid_targets_min": 581 + }, + { + "epoch": 0.5015197568389058, + "grad_norm": 0.5291981229160326, + "learning_rate": 2.854663774403471e-05, + "loss": 0.2646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27261027693748474, + "step": 330, + "valid_targets_mean": 5018.4, + "valid_targets_min": 825 + }, + { + "epoch": 0.5091185410334347, + "grad_norm": 0.5927074347019183, + "learning_rate": 2.8980477223427334e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23593679070472717, + "step": 335, + "valid_targets_mean": 4369.9, + "valid_targets_min": 852 + }, + { + "epoch": 0.5167173252279635, + "grad_norm": 0.5613566212263488, + "learning_rate": 2.941431670281996e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25743183493614197, + "step": 340, + "valid_targets_mean": 4878.5, + "valid_targets_min": 736 + }, + { + "epoch": 0.5243161094224924, + "grad_norm": 0.43333351737553155, + "learning_rate": 2.9848156182212582e-05, + "loss": 0.2418, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22708739340305328, + "step": 345, + "valid_targets_mean": 5272.6, + "valid_targets_min": 2600 + }, + { + "epoch": 0.5319148936170213, + "grad_norm": 0.5349091750560264, + "learning_rate": 3.028199566160521e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2423931062221527, + "step": 350, + "valid_targets_mean": 4710.4, + "valid_targets_min": 1708 + }, + { + "epoch": 0.5395136778115501, + "grad_norm": 0.552342573882699, + "learning_rate": 3.0715835140997834e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2209261655807495, + "step": 355, + "valid_targets_mean": 4220.2, + "valid_targets_min": 498 + }, + { + "epoch": 0.547112462006079, + "grad_norm": 0.47358599117445827, + "learning_rate": 3.114967462039046e-05, + "loss": 0.2431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22403742372989655, + "step": 360, + "valid_targets_mean": 5009.1, + "valid_targets_min": 826 + }, + { + "epoch": 0.5547112462006079, + "grad_norm": 0.5508791979851065, + "learning_rate": 3.158351409978308e-05, + "loss": 0.271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2744586765766144, + "step": 365, + "valid_targets_mean": 4221.0, + "valid_targets_min": 658 + }, + { + "epoch": 0.5623100303951368, + "grad_norm": 0.4880656851972933, + "learning_rate": 3.2017353579175706e-05, + "loss": 0.257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2102406919002533, + "step": 370, + "valid_targets_mean": 5021.4, + "valid_targets_min": 422 + }, + { + "epoch": 0.5699088145896657, + "grad_norm": 0.5066085991817625, + "learning_rate": 3.2451193058568337e-05, + "loss": 0.2493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.220636785030365, + "step": 375, + "valid_targets_mean": 4511.9, + "valid_targets_min": 904 + }, + { + "epoch": 0.5775075987841946, + "grad_norm": 0.7189915330518424, + "learning_rate": 3.288503253796096e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2566250264644623, + "step": 380, + "valid_targets_mean": 4385.5, + "valid_targets_min": 551 + }, + { + "epoch": 0.5851063829787234, + "grad_norm": 0.548460587196264, + "learning_rate": 3.331887201735358e-05, + "loss": 0.259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23478102684020996, + "step": 385, + "valid_targets_mean": 4521.1, + "valid_targets_min": 940 + }, + { + "epoch": 0.5927051671732523, + "grad_norm": 0.7285447421019886, + "learning_rate": 3.375271149674621e-05, + "loss": 0.2528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2555179297924042, + "step": 390, + "valid_targets_mean": 3723.9, + "valid_targets_min": 409 + }, + { + "epoch": 0.6003039513677811, + "grad_norm": 0.5544467667445715, + "learning_rate": 3.418655097613883e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2515088617801666, + "step": 395, + "valid_targets_mean": 4729.4, + "valid_targets_min": 791 + }, + { + "epoch": 0.60790273556231, + "grad_norm": 0.53995409546225, + "learning_rate": 3.462039045553146e-05, + "loss": 0.2477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23457960784435272, + "step": 400, + "valid_targets_mean": 5069.6, + "valid_targets_min": 489 + }, + { + "epoch": 0.6155015197568389, + "grad_norm": 0.9196646418934739, + "learning_rate": 3.505422993492408e-05, + "loss": 0.2449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23785921931266785, + "step": 405, + "valid_targets_mean": 4393.4, + "valid_targets_min": 829 + }, + { + "epoch": 0.6231003039513677, + "grad_norm": 0.6099790951053485, + "learning_rate": 3.5488069414316705e-05, + "loss": 0.2363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23619459569454193, + "step": 410, + "valid_targets_mean": 3865.0, + "valid_targets_min": 1002 + }, + { + "epoch": 0.6306990881458967, + "grad_norm": 0.5925560150305617, + "learning_rate": 3.592190889370933e-05, + "loss": 0.2534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24648156762123108, + "step": 415, + "valid_targets_mean": 5077.6, + "valid_targets_min": 1028 + }, + { + "epoch": 0.6382978723404256, + "grad_norm": 0.5334746742630121, + "learning_rate": 3.635574837310195e-05, + "loss": 0.2337, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21448864042758942, + "step": 420, + "valid_targets_mean": 4616.1, + "valid_targets_min": 574 + }, + { + "epoch": 0.6458966565349544, + "grad_norm": 0.5167724016109878, + "learning_rate": 3.678958785249458e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.230168879032135, + "step": 425, + "valid_targets_mean": 4870.9, + "valid_targets_min": 921 + }, + { + "epoch": 0.6534954407294833, + "grad_norm": 0.5454984899416742, + "learning_rate": 3.722342733188721e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.262331485748291, + "step": 430, + "valid_targets_mean": 5432.7, + "valid_targets_min": 933 + }, + { + "epoch": 0.6610942249240122, + "grad_norm": 0.7273049295570767, + "learning_rate": 3.765726681127983e-05, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25644373893737793, + "step": 435, + "valid_targets_mean": 3536.0, + "valid_targets_min": 710 + }, + { + "epoch": 0.668693009118541, + "grad_norm": 0.5453606831740409, + "learning_rate": 3.8091106290672456e-05, + "loss": 0.2312, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24661529064178467, + "step": 440, + "valid_targets_mean": 4021.8, + "valid_targets_min": 599 + }, + { + "epoch": 0.6762917933130699, + "grad_norm": 0.5397724365767885, + "learning_rate": 3.852494577006508e-05, + "loss": 0.2536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2421499490737915, + "step": 445, + "valid_targets_mean": 4597.5, + "valid_targets_min": 370 + }, + { + "epoch": 0.6838905775075987, + "grad_norm": 0.5764238756332084, + "learning_rate": 3.8958785249457704e-05, + "loss": 0.2261, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2172556221485138, + "step": 450, + "valid_targets_mean": 5077.4, + "valid_targets_min": 1299 + }, + { + "epoch": 0.6914893617021277, + "grad_norm": 0.5511870862131859, + "learning_rate": 3.939262472885033e-05, + "loss": 0.2546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23796507716178894, + "step": 455, + "valid_targets_mean": 3540.4, + "valid_targets_min": 781 + }, + { + "epoch": 0.6990881458966566, + "grad_norm": 0.5240137590905279, + "learning_rate": 3.982646420824295e-05, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3238080143928528, + "step": 460, + "valid_targets_mean": 6050.4, + "valid_targets_min": 921 + }, + { + "epoch": 0.7066869300911854, + "grad_norm": 0.504788443919895, + "learning_rate": 3.999994829970777e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2535270154476166, + "step": 465, + "valid_targets_mean": 4623.6, + "valid_targets_min": 642 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 0.6552382106268326, + "learning_rate": 3.999963235444541e-05, + "loss": 0.249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23992504179477692, + "step": 470, + "valid_targets_mean": 5108.4, + "valid_targets_min": 702 + }, + { + "epoch": 0.7218844984802432, + "grad_norm": 0.5338400861286152, + "learning_rate": 3.999902919083712e-05, + "loss": 0.2426, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2298986315727234, + "step": 475, + "valid_targets_mean": 4551.4, + "valid_targets_min": 827 + }, + { + "epoch": 0.729483282674772, + "grad_norm": 0.4717163253476549, + "learning_rate": 3.999813881754504e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22416530549526215, + "step": 480, + "valid_targets_mean": 5166.6, + "valid_targets_min": 2461 + }, + { + "epoch": 0.7370820668693009, + "grad_norm": 0.46569192135259724, + "learning_rate": 3.999696124735598e-05, + "loss": 0.2388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20350381731987, + "step": 485, + "valid_targets_mean": 5345.7, + "valid_targets_min": 1890 + }, + { + "epoch": 0.7446808510638298, + "grad_norm": 0.42173419222407094, + "learning_rate": 3.999549649718124e-05, + "loss": 0.2325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21917913854122162, + "step": 490, + "valid_targets_mean": 6391.4, + "valid_targets_min": 3738 + }, + { + "epoch": 0.7522796352583586, + "grad_norm": 0.5432626540059464, + "learning_rate": 3.999374458805636e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2308879792690277, + "step": 495, + "valid_targets_mean": 4123.1, + "valid_targets_min": 597 + }, + { + "epoch": 0.7598784194528876, + "grad_norm": 0.5137145588974806, + "learning_rate": 3.999170554514082e-05, + "loss": 0.2331, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23714055120944977, + "step": 500, + "valid_targets_mean": 4805.9, + "valid_targets_min": 605 + }, + { + "epoch": 0.7674772036474165, + "grad_norm": 0.49268000212812557, + "learning_rate": 3.998937939771771e-05, + "loss": 0.2222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24276787042617798, + "step": 505, + "valid_targets_mean": 4071.9, + "valid_targets_min": 790 + }, + { + "epoch": 0.7750759878419453, + "grad_norm": 0.6202296018779553, + "learning_rate": 3.998676617919322e-05, + "loss": 0.2437, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27594321966171265, + "step": 510, + "valid_targets_mean": 3380.7, + "valid_targets_min": 440 + }, + { + "epoch": 0.7826747720364742, + "grad_norm": 0.4863803507783552, + "learning_rate": 3.9983865927096276e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24749267101287842, + "step": 515, + "valid_targets_mean": 5073.1, + "valid_targets_min": 777 + }, + { + "epoch": 0.790273556231003, + "grad_norm": 0.5836827968647129, + "learning_rate": 3.998067868307792e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23861373960971832, + "step": 520, + "valid_targets_mean": 5020.6, + "valid_targets_min": 570 + }, + { + "epoch": 0.7978723404255319, + "grad_norm": 0.5231558404449659, + "learning_rate": 3.9977204492910744e-05, + "loss": 0.233, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22653838992118835, + "step": 525, + "valid_targets_mean": 4575.4, + "valid_targets_min": 565 + }, + { + "epoch": 0.8054711246200608, + "grad_norm": 0.5430934870471972, + "learning_rate": 3.997344340648822e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22603115439414978, + "step": 530, + "valid_targets_mean": 4679.3, + "valid_targets_min": 937 + }, + { + "epoch": 0.8130699088145896, + "grad_norm": 0.5286884499015065, + "learning_rate": 3.996939547782399e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22073689103126526, + "step": 535, + "valid_targets_mean": 4476.1, + "valid_targets_min": 718 + }, + { + "epoch": 0.8206686930091185, + "grad_norm": 0.5785998410503295, + "learning_rate": 3.996506076505109e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2660371661186218, + "step": 540, + "valid_targets_mean": 4106.3, + "valid_targets_min": 487 + }, + { + "epoch": 0.8282674772036475, + "grad_norm": 0.6951317726718936, + "learning_rate": 3.996043933042112e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24442562460899353, + "step": 545, + "valid_targets_mean": 4844.5, + "valid_targets_min": 834 + }, + { + "epoch": 0.8358662613981763, + "grad_norm": 0.43506966442749545, + "learning_rate": 3.995553124030334e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20776590704917908, + "step": 550, + "valid_targets_mean": 4980.6, + "valid_targets_min": 465 + }, + { + "epoch": 0.8434650455927052, + "grad_norm": 0.4706049122050459, + "learning_rate": 3.9950336565183725e-05, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27285271883010864, + "step": 555, + "valid_targets_mean": 5395.9, + "valid_targets_min": 990 + }, + { + "epoch": 0.851063829787234, + "grad_norm": 0.49091088519408277, + "learning_rate": 3.994485537966394e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2088671624660492, + "step": 560, + "valid_targets_mean": 4588.9, + "valid_targets_min": 930 + }, + { + "epoch": 0.8586626139817629, + "grad_norm": 0.5986083125056836, + "learning_rate": 3.993908776246029e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24598953127861023, + "step": 565, + "valid_targets_mean": 3864.4, + "valid_targets_min": 694 + }, + { + "epoch": 0.8662613981762918, + "grad_norm": 0.5161879038146385, + "learning_rate": 3.993303379640256e-05, + "loss": 0.2433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24257272481918335, + "step": 570, + "valid_targets_mean": 3828.1, + "valid_targets_min": 714 + }, + { + "epoch": 0.8738601823708206, + "grad_norm": 0.4991892963677317, + "learning_rate": 3.992669356843287e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2508493661880493, + "step": 575, + "valid_targets_mean": 4322.0, + "valid_targets_min": 1183 + }, + { + "epoch": 0.8814589665653495, + "grad_norm": 0.46377204664593574, + "learning_rate": 3.992006716960437e-05, + "loss": 0.2293, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2126588225364685, + "step": 580, + "valid_targets_mean": 4726.4, + "valid_targets_min": 771 + }, + { + "epoch": 0.8890577507598785, + "grad_norm": 0.4484638597015378, + "learning_rate": 3.9913154695079983e-05, + "loss": 0.2479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2277049422264099, + "step": 585, + "valid_targets_mean": 5441.6, + "valid_targets_min": 672 + }, + { + "epoch": 0.8966565349544073, + "grad_norm": 0.473975742493428, + "learning_rate": 3.9905956244131e-05, + "loss": 0.2406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2310577630996704, + "step": 590, + "valid_targets_mean": 4966.7, + "valid_targets_min": 171 + }, + { + "epoch": 0.9042553191489362, + "grad_norm": 0.5269660383414424, + "learning_rate": 3.989847192013569e-05, + "loss": 0.2439, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20536382496356964, + "step": 595, + "valid_targets_mean": 4305.1, + "valid_targets_min": 818 + }, + { + "epoch": 0.9118541033434651, + "grad_norm": 0.5290469023785411, + "learning_rate": 3.9890701830577784e-05, + "loss": 0.2519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19890496134757996, + "step": 600, + "valid_targets_mean": 3980.8, + "valid_targets_min": 911 + }, + { + "epoch": 0.9194528875379939, + "grad_norm": 0.44803784041894573, + "learning_rate": 3.9882646087044944e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2399585247039795, + "step": 605, + "valid_targets_mean": 5333.2, + "valid_targets_min": 280 + }, + { + "epoch": 0.9270516717325228, + "grad_norm": 0.4328101297576573, + "learning_rate": 3.987430480522717e-05, + "loss": 0.2227, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21868915855884552, + "step": 610, + "valid_targets_mean": 5678.4, + "valid_targets_min": 1871 + }, + { + "epoch": 0.9346504559270516, + "grad_norm": 0.49304307903824257, + "learning_rate": 3.986567810491511e-05, + "loss": 0.2557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2420819252729416, + "step": 615, + "valid_targets_mean": 4536.2, + "valid_targets_min": 809 + }, + { + "epoch": 0.9422492401215805, + "grad_norm": 0.5700813425381566, + "learning_rate": 3.9856766109998376e-05, + "loss": 0.2407, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21311193704605103, + "step": 620, + "valid_targets_mean": 4451.4, + "valid_targets_min": 593 + }, + { + "epoch": 0.9498480243161094, + "grad_norm": 0.6294683305416847, + "learning_rate": 3.9847568948463754e-05, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20918530225753784, + "step": 625, + "valid_targets_mean": 4797.4, + "valid_targets_min": 847 + }, + { + "epoch": 0.9574468085106383, + "grad_norm": 0.49380029688075006, + "learning_rate": 3.983808675239333e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2152310311794281, + "step": 630, + "valid_targets_mean": 4176.9, + "valid_targets_min": 692 + }, + { + "epoch": 0.9650455927051672, + "grad_norm": 0.5200102205973911, + "learning_rate": 3.9828319657962655e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2529393136501312, + "step": 635, + "valid_targets_mean": 4521.0, + "valid_targets_min": 1069 + }, + { + "epoch": 0.9726443768996961, + "grad_norm": 0.5009014898507517, + "learning_rate": 3.981826780543873e-05, + "loss": 0.2398, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2124088704586029, + "step": 640, + "valid_targets_mean": 4639.8, + "valid_targets_min": 268 + }, + { + "epoch": 0.9802431610942249, + "grad_norm": 0.47854286438275906, + "learning_rate": 3.980793133917805e-05, + "loss": 0.2284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22133532166481018, + "step": 645, + "valid_targets_mean": 4939.1, + "valid_targets_min": 1059 + }, + { + "epoch": 0.9878419452887538, + "grad_norm": 0.44229863697562477, + "learning_rate": 3.979731040762446e-05, + "loss": 0.2596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20959582924842834, + "step": 650, + "valid_targets_mean": 5179.6, + "valid_targets_min": 805 + }, + { + "epoch": 0.9954407294832827, + "grad_norm": 0.4641726376092792, + "learning_rate": 3.97864051633071e-05, + "loss": 0.2249, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22157493233680725, + "step": 655, + "valid_targets_mean": 4819.9, + "valid_targets_min": 646 + }, + { + "epoch": 1.0030395136778116, + "grad_norm": 0.7931644284514437, + "learning_rate": 3.977521576283815e-05, + "loss": 0.2343, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19440427422523499, + "step": 660, + "valid_targets_mean": 4668.7, + "valid_targets_min": 915 + }, + { + "epoch": 1.0106382978723405, + "grad_norm": 0.48788029210277306, + "learning_rate": 3.9763742366910626e-05, + "loss": 0.206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1956978291273117, + "step": 665, + "valid_targets_mean": 4805.8, + "valid_targets_min": 694 + }, + { + "epoch": 1.0182370820668694, + "grad_norm": 0.5298990414713055, + "learning_rate": 3.975198514029604e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2972638010978699, + "step": 670, + "valid_targets_mean": 4853.2, + "valid_targets_min": 512 + }, + { + "epoch": 1.0258358662613982, + "grad_norm": 0.5473369439399378, + "learning_rate": 3.9739944251842054e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21648220717906952, + "step": 675, + "valid_targets_mean": 5254.8, + "valid_targets_min": 1077 + }, + { + "epoch": 1.033434650455927, + "grad_norm": 0.8965405989782883, + "learning_rate": 3.9727619874470066e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24239912629127502, + "step": 680, + "valid_targets_mean": 4575.9, + "valid_targets_min": 847 + }, + { + "epoch": 1.041033434650456, + "grad_norm": 0.45053643384905995, + "learning_rate": 3.971501218517267e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20542305707931519, + "step": 685, + "valid_targets_mean": 4701.5, + "valid_targets_min": 359 + }, + { + "epoch": 1.0486322188449848, + "grad_norm": 0.5355621304269382, + "learning_rate": 3.9702121365011194e-05, + "loss": 0.2427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24637873470783234, + "step": 690, + "valid_targets_mean": 4662.4, + "valid_targets_min": 797 + }, + { + "epoch": 1.0562310030395137, + "grad_norm": 0.46031160112965963, + "learning_rate": 3.968894759911304e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22839926183223724, + "step": 695, + "valid_targets_mean": 5164.2, + "valid_targets_min": 904 + }, + { + "epoch": 1.0638297872340425, + "grad_norm": 0.4910433091107382, + "learning_rate": 3.9675491076669043e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21850205957889557, + "step": 700, + "valid_targets_mean": 4950.7, + "valid_targets_min": 478 + }, + { + "epoch": 1.0714285714285714, + "grad_norm": 0.5876241654950701, + "learning_rate": 3.966175199093077e-05, + "loss": 0.2152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23394906520843506, + "step": 705, + "valid_targets_mean": 3169.8, + "valid_targets_min": 809 + }, + { + "epoch": 1.0790273556231003, + "grad_norm": 0.46432326611030017, + "learning_rate": 3.9647730539207715e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2200213074684143, + "step": 710, + "valid_targets_mean": 5574.2, + "valid_targets_min": 2786 + }, + { + "epoch": 1.0866261398176291, + "grad_norm": 0.4275229879620877, + "learning_rate": 3.963342692286449e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20706182718276978, + "step": 715, + "valid_targets_mean": 5002.6, + "valid_targets_min": 1028 + }, + { + "epoch": 1.094224924012158, + "grad_norm": 0.5268786606408785, + "learning_rate": 3.9618841347317925e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20836099982261658, + "step": 720, + "valid_targets_mean": 3824.6, + "valid_targets_min": 834 + }, + { + "epoch": 1.1018237082066868, + "grad_norm": 0.44352266391750467, + "learning_rate": 3.9603974022034117e-05, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1991317719221115, + "step": 725, + "valid_targets_mean": 5762.8, + "valid_targets_min": 2565 + }, + { + "epoch": 1.1094224924012157, + "grad_norm": 0.5059377730602702, + "learning_rate": 3.9588825160525406e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2332250475883484, + "step": 730, + "valid_targets_mean": 4394.6, + "valid_targets_min": 778 + }, + { + "epoch": 1.1170212765957448, + "grad_norm": 0.5277265421398942, + "learning_rate": 3.9573394980347354e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23748330771923065, + "step": 735, + "valid_targets_mean": 4523.4, + "valid_targets_min": 588 + }, + { + "epoch": 1.1246200607902737, + "grad_norm": 0.46757311577973365, + "learning_rate": 3.9557683703095564e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20653977990150452, + "step": 740, + "valid_targets_mean": 5119.2, + "valid_targets_min": 446 + }, + { + "epoch": 1.1322188449848025, + "grad_norm": 0.4423041648030623, + "learning_rate": 3.954169155440255e-05, + "loss": 0.2234, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2072131335735321, + "step": 745, + "valid_targets_mean": 4472.0, + "valid_targets_min": 271 + }, + { + "epoch": 1.1398176291793314, + "grad_norm": 0.4428410311750491, + "learning_rate": 3.952541876393444e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23159939050674438, + "step": 750, + "valid_targets_mean": 4634.8, + "valid_targets_min": 810 + }, + { + "epoch": 1.1474164133738602, + "grad_norm": 0.5243183428010766, + "learning_rate": 3.9508865565387745e-05, + "loss": 0.2137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22126150131225586, + "step": 755, + "valid_targets_mean": 4285.5, + "valid_targets_min": 702 + }, + { + "epoch": 1.155015197568389, + "grad_norm": 0.45761304225615707, + "learning_rate": 3.949203219648594e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20640668272972107, + "step": 760, + "valid_targets_mean": 4922.4, + "valid_targets_min": 229 + }, + { + "epoch": 1.162613981762918, + "grad_norm": 0.4830790302776904, + "learning_rate": 3.94749188989761e-05, + "loss": 0.2241, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2300214022397995, + "step": 765, + "valid_targets_mean": 4988.9, + "valid_targets_min": 2275 + }, + { + "epoch": 1.1702127659574468, + "grad_norm": 0.6344964312381591, + "learning_rate": 3.945752591862538e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22842663526535034, + "step": 770, + "valid_targets_mean": 3627.2, + "valid_targets_min": 662 + }, + { + "epoch": 1.1778115501519757, + "grad_norm": 0.48098689360030655, + "learning_rate": 3.943985350521753e-05, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2292681783437729, + "step": 775, + "valid_targets_mean": 4028.1, + "valid_targets_min": 732 + }, + { + "epoch": 1.1854103343465046, + "grad_norm": 0.5336086809835088, + "learning_rate": 3.942190191254928e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22446206212043762, + "step": 780, + "valid_targets_mean": 4275.8, + "valid_targets_min": 741 + }, + { + "epoch": 1.1930091185410334, + "grad_norm": 0.613832091134627, + "learning_rate": 3.9403671398426705e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2014659196138382, + "step": 785, + "valid_targets_mean": 5527.8, + "valid_targets_min": 2519 + }, + { + "epoch": 1.2006079027355623, + "grad_norm": 0.7772463802272693, + "learning_rate": 3.938516222466153e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20852729678153992, + "step": 790, + "valid_targets_mean": 4968.9, + "valid_targets_min": 605 + }, + { + "epoch": 1.2082066869300911, + "grad_norm": 0.5158401148303159, + "learning_rate": 3.936637465706735e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25038856267929077, + "step": 795, + "valid_targets_mean": 4784.4, + "valid_targets_min": 655 + }, + { + "epoch": 1.21580547112462, + "grad_norm": 1.031320737042501, + "learning_rate": 3.934730896545583e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21661558747291565, + "step": 800, + "valid_targets_mean": 3556.4, + "valid_targets_min": 653 + }, + { + "epoch": 1.2234042553191489, + "grad_norm": 0.46779923698778514, + "learning_rate": 3.932796542363283e-05, + "loss": 0.2113, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19566161930561066, + "step": 805, + "valid_targets_mean": 4376.0, + "valid_targets_min": 810 + }, + { + "epoch": 1.2310030395136777, + "grad_norm": 0.4326481517949445, + "learning_rate": 3.930834430939444e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19606706500053406, + "step": 810, + "valid_targets_mean": 5616.6, + "valid_targets_min": 2732 + }, + { + "epoch": 1.2386018237082066, + "grad_norm": 0.47029688029877836, + "learning_rate": 3.9288445904523063e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1963462084531784, + "step": 815, + "valid_targets_mean": 4256.6, + "valid_targets_min": 1593 + }, + { + "epoch": 1.2462006079027357, + "grad_norm": 0.4914352971694942, + "learning_rate": 3.926827049478329e-05, + "loss": 0.2273, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2369862049818039, + "step": 820, + "valid_targets_mean": 5430.8, + "valid_targets_min": 465 + }, + { + "epoch": 1.2537993920972643, + "grad_norm": 0.4983986981283837, + "learning_rate": 3.924781836991783e-05, + "loss": 0.2175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2174890637397766, + "step": 825, + "valid_targets_mean": 4507.7, + "valid_targets_min": 801 + }, + { + "epoch": 1.2613981762917934, + "grad_norm": 0.4385888971144186, + "learning_rate": 3.922708982364337e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21676486730575562, + "step": 830, + "valid_targets_mean": 5402.2, + "valid_targets_min": 933 + }, + { + "epoch": 1.2689969604863223, + "grad_norm": 0.45249778212242275, + "learning_rate": 3.920608515364631e-05, + "loss": 0.2245, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20077060163021088, + "step": 835, + "valid_targets_mean": 4443.8, + "valid_targets_min": 491 + }, + { + "epoch": 1.2765957446808511, + "grad_norm": 0.47777362802657686, + "learning_rate": 3.9184804661578535e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23784896731376648, + "step": 840, + "valid_targets_mean": 5061.4, + "valid_targets_min": 846 + }, + { + "epoch": 1.28419452887538, + "grad_norm": 0.4135085697969651, + "learning_rate": 3.9163248653053033e-05, + "loss": 0.2066, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20635738968849182, + "step": 845, + "valid_targets_mean": 5489.4, + "valid_targets_min": 1148 + }, + { + "epoch": 1.2917933130699089, + "grad_norm": 0.4117297091921728, + "learning_rate": 3.9141417437639566e-05, + "loss": 0.2018, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18252739310264587, + "step": 850, + "valid_targets_mean": 4780.3, + "valid_targets_min": 799 + }, + { + "epoch": 1.2993920972644377, + "grad_norm": 0.48551756342329966, + "learning_rate": 3.911931132886016e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24173299968242645, + "step": 855, + "valid_targets_mean": 5113.2, + "valid_targets_min": 756 + }, + { + "epoch": 1.3069908814589666, + "grad_norm": 0.4727140541170104, + "learning_rate": 3.9096930644184674e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1915874183177948, + "step": 860, + "valid_targets_mean": 4394.3, + "valid_targets_min": 685 + }, + { + "epoch": 1.3145896656534954, + "grad_norm": 0.48025400940036744, + "learning_rate": 3.907427570502616e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2418954074382782, + "step": 865, + "valid_targets_mean": 3894.8, + "valid_targets_min": 502 + }, + { + "epoch": 1.3221884498480243, + "grad_norm": 0.4849042187708983, + "learning_rate": 3.90513468367363e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2329632043838501, + "step": 870, + "valid_targets_mean": 4571.7, + "valid_targets_min": 1992 + }, + { + "epoch": 1.3297872340425532, + "grad_norm": 0.5027508815921192, + "learning_rate": 3.9028144368600746e-05, + "loss": 0.2072, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19249165058135986, + "step": 875, + "valid_targets_mean": 4092.1, + "valid_targets_min": 490 + }, + { + "epoch": 1.337386018237082, + "grad_norm": 0.43340094826535847, + "learning_rate": 3.900466863383434e-05, + "loss": 0.2074, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16721683740615845, + "step": 880, + "valid_targets_mean": 4360.1, + "valid_targets_min": 678 + }, + { + "epoch": 1.344984802431611, + "grad_norm": 0.42492899268845147, + "learning_rate": 3.898091996957638e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18311715126037598, + "step": 885, + "valid_targets_mean": 5269.4, + "valid_targets_min": 2234 + }, + { + "epoch": 1.3525835866261398, + "grad_norm": 0.45185682893053764, + "learning_rate": 3.895689871688571e-05, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2394876629114151, + "step": 890, + "valid_targets_mean": 5222.2, + "valid_targets_min": 779 + }, + { + "epoch": 1.3601823708206686, + "grad_norm": 0.4595708686629068, + "learning_rate": 3.893260522073591e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19897980988025665, + "step": 895, + "valid_targets_mean": 4327.2, + "valid_targets_min": 1913 + }, + { + "epoch": 1.3677811550151975, + "grad_norm": 0.5538183693714539, + "learning_rate": 3.8908039830010296e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21077513694763184, + "step": 900, + "valid_targets_mean": 4477.2, + "valid_targets_min": 1914 + }, + { + "epoch": 1.3753799392097266, + "grad_norm": 0.4826233268835563, + "learning_rate": 3.888320289749687e-05, + "loss": 0.2167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20590105652809143, + "step": 905, + "valid_targets_mean": 4855.0, + "valid_targets_min": 987 + }, + { + "epoch": 1.3829787234042552, + "grad_norm": 0.45816874548401804, + "learning_rate": 3.885809477988334e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18594498932361603, + "step": 910, + "valid_targets_mean": 4613.8, + "valid_targets_min": 486 + }, + { + "epoch": 1.3905775075987843, + "grad_norm": 0.46458650367475696, + "learning_rate": 3.883271583775194e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20757240056991577, + "step": 915, + "valid_targets_mean": 5135.2, + "valid_targets_min": 567 + }, + { + "epoch": 1.3981762917933132, + "grad_norm": 0.49573212835944025, + "learning_rate": 3.880706643557425e-05, + "loss": 0.2267, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20922252535820007, + "step": 920, + "valid_targets_mean": 4993.9, + "valid_targets_min": 1608 + }, + { + "epoch": 1.405775075987842, + "grad_norm": 0.5065076653284654, + "learning_rate": 3.8781146941705975e-05, + "loss": 0.219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22266633808612823, + "step": 925, + "valid_targets_mean": 3984.9, + "valid_targets_min": 663 + }, + { + "epoch": 1.4133738601823709, + "grad_norm": 0.46389395621934165, + "learning_rate": 3.8754957728381676e-05, + "loss": 0.2122, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1952955424785614, + "step": 930, + "valid_targets_mean": 4305.5, + "valid_targets_min": 659 + }, + { + "epoch": 1.4209726443768997, + "grad_norm": 0.4244364300260148, + "learning_rate": 3.87284991717094e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19909913837909698, + "step": 935, + "valid_targets_mean": 5106.8, + "valid_targets_min": 692 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.48753574099908936, + "learning_rate": 3.870177165166526e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22329512238502502, + "step": 940, + "valid_targets_mean": 4227.9, + "valid_targets_min": 960 + }, + { + "epoch": 1.4361702127659575, + "grad_norm": 0.46921808961532296, + "learning_rate": 3.8674775552088034e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1966555416584015, + "step": 945, + "valid_targets_mean": 4668.8, + "valid_targets_min": 1045 + }, + { + "epoch": 1.4437689969604863, + "grad_norm": 0.5011462054285303, + "learning_rate": 3.864751126067359e-05, + "loss": 0.2288, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3067967891693115, + "step": 950, + "valid_targets_mean": 5225.8, + "valid_targets_min": 982 + }, + { + "epoch": 1.4513677811550152, + "grad_norm": 0.45640279658712546, + "learning_rate": 3.861997916896937e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20878750085830688, + "step": 955, + "valid_targets_mean": 4429.0, + "valid_targets_min": 401 + }, + { + "epoch": 1.458966565349544, + "grad_norm": 0.4318101933694944, + "learning_rate": 3.859217967236872e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21009142696857452, + "step": 960, + "valid_targets_mean": 5187.9, + "valid_targets_min": 1245 + }, + { + "epoch": 1.466565349544073, + "grad_norm": 0.4639537052196582, + "learning_rate": 3.856411317010525e-05, + "loss": 0.2253, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20182490348815918, + "step": 965, + "valid_targets_mean": 4228.9, + "valid_targets_min": 1395 + }, + { + "epoch": 1.4741641337386018, + "grad_norm": 0.5830750830049942, + "learning_rate": 3.853578006524711e-05, + "loss": 0.2198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25057747960090637, + "step": 970, + "valid_targets_mean": 3692.9, + "valid_targets_min": 853 + }, + { + "epoch": 1.4817629179331306, + "grad_norm": 0.6135846306078634, + "learning_rate": 3.8507180764691134e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26475679874420166, + "step": 975, + "valid_targets_mean": 5166.6, + "valid_targets_min": 2214 + }, + { + "epoch": 1.4893617021276595, + "grad_norm": 0.42930269291397805, + "learning_rate": 3.847831567915706e-05, + "loss": 0.23, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22688277065753937, + "step": 980, + "valid_targets_mean": 5970.2, + "valid_targets_min": 1687 + }, + { + "epoch": 1.4969604863221884, + "grad_norm": 0.44889401709296317, + "learning_rate": 3.844918522318164e-05, + "loss": 0.2144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2054261565208435, + "step": 985, + "valid_targets_mean": 4887.9, + "valid_targets_min": 1082 + }, + { + "epoch": 1.5045592705167175, + "grad_norm": 0.4849219730059719, + "learning_rate": 3.84197898151126e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2193393111228943, + "step": 990, + "valid_targets_mean": 5518.3, + "valid_targets_min": 348 + }, + { + "epoch": 1.512158054711246, + "grad_norm": 0.47364060795273005, + "learning_rate": 3.839012987710275e-05, + "loss": 0.208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2376159131526947, + "step": 995, + "valid_targets_mean": 4844.3, + "valid_targets_min": 873 + }, + { + "epoch": 1.5197568389057752, + "grad_norm": 0.477005009585305, + "learning_rate": 3.836020583510382e-05, + "loss": 0.2223, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23394693434238434, + "step": 1000, + "valid_targets_mean": 4632.9, + "valid_targets_min": 763 + }, + { + "epoch": 1.5273556231003038, + "grad_norm": 0.43901547735782315, + "learning_rate": 3.833001811886041e-05, + "loss": 0.2131, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2290436327457428, + "step": 1005, + "valid_targets_mean": 4733.1, + "valid_targets_min": 729 + }, + { + "epoch": 1.534954407294833, + "grad_norm": 0.4128584010075957, + "learning_rate": 3.8299567161903787e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18785551190376282, + "step": 1010, + "valid_targets_mean": 5352.2, + "valid_targets_min": 2335 + }, + { + "epoch": 1.5425531914893615, + "grad_norm": 0.5109402768329384, + "learning_rate": 3.826885340154566e-05, + "loss": 0.2272, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25076034665107727, + "step": 1015, + "valid_targets_mean": 4775.5, + "valid_targets_min": 891 + }, + { + "epoch": 1.5501519756838906, + "grad_norm": 0.4615963241009783, + "learning_rate": 3.8237877278871916e-05, + "loss": 0.2257, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22021520137786865, + "step": 1020, + "valid_targets_mean": 4804.6, + "valid_targets_min": 1759 + }, + { + "epoch": 1.5577507598784195, + "grad_norm": 0.41168421067320043, + "learning_rate": 3.820663923873626e-05, + "loss": 0.2219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22017160058021545, + "step": 1025, + "valid_targets_mean": 5034.6, + "valid_targets_min": 689 + }, + { + "epoch": 1.5653495440729484, + "grad_norm": 0.5570301288304671, + "learning_rate": 3.817513972975385e-05, + "loss": 0.2154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21190384030342102, + "step": 1030, + "valid_targets_mean": 3886.7, + "valid_targets_min": 816 + }, + { + "epoch": 1.5729483282674772, + "grad_norm": 0.48710822220429656, + "learning_rate": 3.814337920429485e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21960590779781342, + "step": 1035, + "valid_targets_mean": 4042.9, + "valid_targets_min": 546 + }, + { + "epoch": 1.580547112462006, + "grad_norm": 0.4956003916710543, + "learning_rate": 3.811135811847792e-05, + "loss": 0.2138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2413313090801239, + "step": 1040, + "valid_targets_mean": 4299.2, + "valid_targets_min": 605 + }, + { + "epoch": 1.588145896656535, + "grad_norm": 0.5142660653107718, + "learning_rate": 3.807907693216368e-05, + "loss": 0.2109, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23288393020629883, + "step": 1045, + "valid_targets_mean": 4768.5, + "valid_targets_min": 811 + }, + { + "epoch": 1.5957446808510638, + "grad_norm": 0.5129928776520155, + "learning_rate": 3.804653610894811e-05, + "loss": 0.2157, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20630177855491638, + "step": 1050, + "valid_targets_mean": 3965.7, + "valid_targets_min": 1768 + }, + { + "epoch": 1.6033434650455927, + "grad_norm": 0.4428766734377924, + "learning_rate": 3.801373611615585e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2139415144920349, + "step": 1055, + "valid_targets_mean": 4501.2, + "valid_targets_min": 528 + }, + { + "epoch": 1.6109422492401215, + "grad_norm": 0.3976871596351703, + "learning_rate": 3.798067742483355e-05, + "loss": 0.2055, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1983199268579483, + "step": 1060, + "valid_targets_mean": 4932.3, + "valid_targets_min": 593 + }, + { + "epoch": 1.6185410334346506, + "grad_norm": 0.43538181451443764, + "learning_rate": 3.794736050974308e-05, + "loss": 0.2291, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21794393658638, + "step": 1065, + "valid_targets_mean": 5077.6, + "valid_targets_min": 828 + }, + { + "epoch": 1.6261398176291793, + "grad_norm": 0.41261656921486206, + "learning_rate": 3.7913785849354693e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18821759521961212, + "step": 1070, + "valid_targets_mean": 4659.0, + "valid_targets_min": 776 + }, + { + "epoch": 1.6337386018237083, + "grad_norm": 0.4607656496157841, + "learning_rate": 3.787995392584017e-05, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23070788383483887, + "step": 1075, + "valid_targets_mean": 5037.2, + "valid_targets_min": 767 + }, + { + "epoch": 1.641337386018237, + "grad_norm": 0.521744416704874, + "learning_rate": 3.784586522506589e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23326215147972107, + "step": 1080, + "valid_targets_mean": 4300.6, + "valid_targets_min": 752 + }, + { + "epoch": 1.648936170212766, + "grad_norm": 0.5411151907665869, + "learning_rate": 3.781152023658588e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19801482558250427, + "step": 1085, + "valid_targets_mean": 4037.4, + "valid_targets_min": 565 + }, + { + "epoch": 1.6565349544072947, + "grad_norm": 0.4881180236795167, + "learning_rate": 3.7776919453634735e-05, + "loss": 0.2217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20285995304584503, + "step": 1090, + "valid_targets_mean": 4716.8, + "valid_targets_min": 1812 + }, + { + "epoch": 1.6641337386018238, + "grad_norm": 0.3892565204932596, + "learning_rate": 3.774206337312058e-05, + "loss": 0.2146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19384154677391052, + "step": 1095, + "valid_targets_mean": 5548.1, + "valid_targets_min": 1723 + }, + { + "epoch": 1.6717325227963524, + "grad_norm": 0.6079201585046432, + "learning_rate": 3.7706952495617895e-05, + "loss": 0.2228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2272331863641739, + "step": 1100, + "valid_targets_mean": 4440.2, + "valid_targets_min": 754 + }, + { + "epoch": 1.6793313069908815, + "grad_norm": 0.4392880697961998, + "learning_rate": 3.767158732536037e-05, + "loss": 0.2057, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20622758567333221, + "step": 1105, + "valid_targets_mean": 5115.8, + "valid_targets_min": 2178 + }, + { + "epoch": 1.6869300911854104, + "grad_norm": 0.4760913157214835, + "learning_rate": 3.7635968370233625e-05, + "loss": 0.1947, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22895006835460663, + "step": 1110, + "valid_targets_mean": 4197.4, + "valid_targets_min": 664 + }, + { + "epoch": 1.6945288753799392, + "grad_norm": 0.42921725245111436, + "learning_rate": 3.760009614176792e-05, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20630860328674316, + "step": 1115, + "valid_targets_mean": 5539.1, + "valid_targets_min": 1629 + }, + { + "epoch": 1.702127659574468, + "grad_norm": 0.47421657315391796, + "learning_rate": 3.7563971155130834e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19295352697372437, + "step": 1120, + "valid_targets_mean": 4742.3, + "valid_targets_min": 900 + }, + { + "epoch": 1.709726443768997, + "grad_norm": 0.44277966038489874, + "learning_rate": 3.752759392911986e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21693378686904907, + "step": 1125, + "valid_targets_mean": 5488.9, + "valid_targets_min": 2673 + }, + { + "epoch": 1.7173252279635258, + "grad_norm": 0.35764926472016434, + "learning_rate": 3.7490964986154936e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1632017195224762, + "step": 1130, + "valid_targets_mean": 5798.2, + "valid_targets_min": 2382 + }, + { + "epoch": 1.7249240121580547, + "grad_norm": 0.46310293456133733, + "learning_rate": 3.745408485227094e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2125653326511383, + "step": 1135, + "valid_targets_mean": 4493.4, + "valid_targets_min": 703 + }, + { + "epoch": 1.7325227963525835, + "grad_norm": 0.48402636671915206, + "learning_rate": 3.7416954057110165e-05, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2212226539850235, + "step": 1140, + "valid_targets_mean": 4891.2, + "valid_targets_min": 422 + }, + { + "epoch": 1.7401215805471124, + "grad_norm": 0.48544415851378125, + "learning_rate": 3.7379573133914686e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24577516317367554, + "step": 1145, + "valid_targets_mean": 4704.4, + "valid_targets_min": 683 + }, + { + "epoch": 1.7477203647416415, + "grad_norm": 0.4686217061157862, + "learning_rate": 3.7341942619518736e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20010778307914734, + "step": 1150, + "valid_targets_mean": 4060.2, + "valid_targets_min": 740 + }, + { + "epoch": 1.7553191489361701, + "grad_norm": 0.456474362675113, + "learning_rate": 3.730406305434093e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21529576182365417, + "step": 1155, + "valid_targets_mean": 4461.8, + "valid_targets_min": 407 + }, + { + "epoch": 1.7629179331306992, + "grad_norm": 0.39386216864446827, + "learning_rate": 3.726593498237659e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20742088556289673, + "step": 1160, + "valid_targets_mean": 5696.4, + "valid_targets_min": 832 + }, + { + "epoch": 1.7705167173252279, + "grad_norm": 0.4954398039746765, + "learning_rate": 3.7227558951189866e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2127082198858261, + "step": 1165, + "valid_targets_mean": 4003.1, + "valid_targets_min": 1097 + }, + { + "epoch": 1.778115501519757, + "grad_norm": 0.47012021916770536, + "learning_rate": 3.7188935511905895e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2261389195919037, + "step": 1170, + "valid_targets_mean": 5014.4, + "valid_targets_min": 1036 + }, + { + "epoch": 1.7857142857142856, + "grad_norm": 0.4746767287862107, + "learning_rate": 3.715006521920289e-05, + "loss": 0.2202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19433888792991638, + "step": 1175, + "valid_targets_mean": 4523.8, + "valid_targets_min": 625 + }, + { + "epoch": 1.7933130699088147, + "grad_norm": 0.4791758218613005, + "learning_rate": 3.711094863130417e-05, + "loss": 0.2161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22052708268165588, + "step": 1180, + "valid_targets_mean": 5126.8, + "valid_targets_min": 1164 + }, + { + "epoch": 1.8009118541033433, + "grad_norm": 0.43857817303439106, + "learning_rate": 3.707158630997015e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21629557013511658, + "step": 1185, + "valid_targets_mean": 5371.6, + "valid_targets_min": 728 + }, + { + "epoch": 1.8085106382978724, + "grad_norm": 0.5348041335538533, + "learning_rate": 3.703197882049026e-05, + "loss": 0.21, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2324264943599701, + "step": 1190, + "valid_targets_mean": 3775.2, + "valid_targets_min": 637 + }, + { + "epoch": 1.8161094224924013, + "grad_norm": 0.3889586019552747, + "learning_rate": 3.699212673167484e-05, + "loss": 0.2116, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2274361550807953, + "step": 1195, + "valid_targets_mean": 6306.4, + "valid_targets_min": 622 + }, + { + "epoch": 1.8237082066869301, + "grad_norm": 0.46857624747519244, + "learning_rate": 3.695203061584695e-05, + "loss": 0.2068, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23175036907196045, + "step": 1200, + "valid_targets_mean": 4628.2, + "valid_targets_min": 429 + }, + { + "epoch": 1.831306990881459, + "grad_norm": 0.4885718145421046, + "learning_rate": 3.69116910488342e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22779184579849243, + "step": 1205, + "valid_targets_mean": 4191.6, + "valid_targets_min": 156 + }, + { + "epoch": 1.8389057750759878, + "grad_norm": 0.4639904198544056, + "learning_rate": 3.687110860996041e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2027101218700409, + "step": 1210, + "valid_targets_mean": 4831.6, + "valid_targets_min": 699 + }, + { + "epoch": 1.8465045592705167, + "grad_norm": 0.46833719588845396, + "learning_rate": 3.6830283882037335e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20325730741024017, + "step": 1215, + "valid_targets_mean": 5097.8, + "valid_targets_min": 465 + }, + { + "epoch": 1.8541033434650456, + "grad_norm": 0.41463391792421084, + "learning_rate": 3.678921745135631e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20085079967975616, + "step": 1220, + "valid_targets_mean": 5023.6, + "valid_targets_min": 633 + }, + { + "epoch": 1.8617021276595744, + "grad_norm": 0.44749514129370893, + "learning_rate": 3.674790990767979e-05, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20688676834106445, + "step": 1225, + "valid_targets_mean": 4845.9, + "valid_targets_min": 667 + }, + { + "epoch": 1.8693009118541033, + "grad_norm": 0.4502221753599573, + "learning_rate": 3.670636184423288e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19071269035339355, + "step": 1230, + "valid_targets_mean": 4245.0, + "valid_targets_min": 500 + }, + { + "epoch": 1.8768996960486324, + "grad_norm": 0.5334681412726286, + "learning_rate": 3.666457385769487e-05, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2248763144016266, + "step": 1235, + "valid_targets_mean": 4481.5, + "valid_targets_min": 770 + }, + { + "epoch": 1.884498480243161, + "grad_norm": 0.4644916347166466, + "learning_rate": 3.66225465481906e-05, + "loss": 0.2091, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2305462807416916, + "step": 1240, + "valid_targets_mean": 4303.9, + "valid_targets_min": 630 + }, + { + "epoch": 1.89209726443769, + "grad_norm": 0.4267832479199372, + "learning_rate": 3.658028051928189e-05, + "loss": 0.2092, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18598158657550812, + "step": 1245, + "valid_targets_mean": 4759.4, + "valid_targets_min": 965 + }, + { + "epoch": 1.8996960486322187, + "grad_norm": 0.46975601834979414, + "learning_rate": 3.6537776377958836e-05, + "loss": 0.2302, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2438843995332718, + "step": 1250, + "valid_targets_mean": 4358.6, + "valid_targets_min": 545 + }, + { + "epoch": 1.9072948328267478, + "grad_norm": 0.41467959071923455, + "learning_rate": 3.649503473463112e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18656185269355774, + "step": 1255, + "valid_targets_mean": 4789.7, + "valid_targets_min": 622 + }, + { + "epoch": 1.9148936170212765, + "grad_norm": 0.48720410950048604, + "learning_rate": 3.645205620311923e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2179066389799118, + "step": 1260, + "valid_targets_mean": 4316.3, + "valid_targets_min": 718 + }, + { + "epoch": 1.9224924012158056, + "grad_norm": 0.47737074347118913, + "learning_rate": 3.6408841400645644e-05, + "loss": 0.2311, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20159170031547546, + "step": 1265, + "valid_targets_mean": 4676.4, + "valid_targets_min": 920 + }, + { + "epoch": 1.9300911854103342, + "grad_norm": 0.38561599683027503, + "learning_rate": 3.636539094782598e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18330830335617065, + "step": 1270, + "valid_targets_mean": 4813.8, + "valid_targets_min": 811 + }, + { + "epoch": 1.9376899696048633, + "grad_norm": 0.4850605122437156, + "learning_rate": 3.632170546866007e-05, + "loss": 0.2168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22532254457473755, + "step": 1275, + "valid_targets_mean": 3973.6, + "valid_targets_min": 335 + }, + { + "epoch": 1.9452887537993921, + "grad_norm": 0.43992666790664475, + "learning_rate": 3.6277785590523e-05, + "loss": 0.2201, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24039697647094727, + "step": 1280, + "valid_targets_mean": 5707.9, + "valid_targets_min": 511 + }, + { + "epoch": 1.952887537993921, + "grad_norm": 3.9664174496666083, + "learning_rate": 3.623363194415609e-05, + "loss": 0.214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21847763657569885, + "step": 1285, + "valid_targets_mean": 4531.5, + "valid_targets_min": 991 + }, + { + "epoch": 1.9604863221884499, + "grad_norm": 0.4595577415716105, + "learning_rate": 3.618924516365788e-05, + "loss": 0.2042, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20071235299110413, + "step": 1290, + "valid_targets_mean": 4902.6, + "valid_targets_min": 777 + }, + { + "epoch": 1.9680851063829787, + "grad_norm": 0.44567806699122325, + "learning_rate": 3.614462588647495e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24957430362701416, + "step": 1295, + "valid_targets_mean": 4669.9, + "valid_targets_min": 1479 + }, + { + "epoch": 1.9756838905775076, + "grad_norm": 0.42210130028239157, + "learning_rate": 3.609977475339284e-05, + "loss": 0.2064, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2057563215494156, + "step": 1300, + "valid_targets_mean": 5229.6, + "valid_targets_min": 969 + }, + { + "epoch": 1.9832826747720365, + "grad_norm": 0.48785866462041777, + "learning_rate": 3.6054692408526806e-05, + "loss": 0.1967, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2031518667936325, + "step": 1305, + "valid_targets_mean": 3585.4, + "valid_targets_min": 843 + }, + { + "epoch": 1.9908814589665653, + "grad_norm": 0.4191280920943274, + "learning_rate": 3.6009379499312563e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21392418444156647, + "step": 1310, + "valid_targets_mean": 5448.1, + "valid_targets_min": 2162 + }, + { + "epoch": 1.9984802431610942, + "grad_norm": 0.571865229628622, + "learning_rate": 3.5963836676497034e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21553698182106018, + "step": 1315, + "valid_targets_mean": 3434.1, + "valid_targets_min": 460 + }, + { + "epoch": 2.0060790273556233, + "grad_norm": 0.4580819817800281, + "learning_rate": 3.5918064594128946e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18602877855300903, + "step": 1320, + "valid_targets_mean": 4337.8, + "valid_targets_min": 527 + }, + { + "epoch": 2.013677811550152, + "grad_norm": 0.4308848554547568, + "learning_rate": 3.5872063909549465e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1809544563293457, + "step": 1325, + "valid_targets_mean": 4842.1, + "valid_targets_min": 305 + }, + { + "epoch": 2.021276595744681, + "grad_norm": 0.4304570423447681, + "learning_rate": 3.5825835283382754e-05, + "loss": 0.1893, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17694029211997986, + "step": 1330, + "valid_targets_mean": 5139.7, + "valid_targets_min": 1709 + }, + { + "epoch": 2.0288753799392096, + "grad_norm": 0.4201481077802898, + "learning_rate": 3.5779379379526516e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17606091499328613, + "step": 1335, + "valid_targets_mean": 5894.2, + "valid_targets_min": 348 + }, + { + "epoch": 2.0364741641337387, + "grad_norm": 0.43796821938518954, + "learning_rate": 3.57326968651424e-05, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19579800963401794, + "step": 1340, + "valid_targets_mean": 4780.1, + "valid_targets_min": 1077 + }, + { + "epoch": 2.0440729483282674, + "grad_norm": 0.431578307043594, + "learning_rate": 3.5685788410646455e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17831829190254211, + "step": 1345, + "valid_targets_mean": 4907.6, + "valid_targets_min": 590 + }, + { + "epoch": 2.0516717325227964, + "grad_norm": 0.4188648321222995, + "learning_rate": 3.5638654689699493e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15893448889255524, + "step": 1350, + "valid_targets_mean": 5083.1, + "valid_targets_min": 1164 + }, + { + "epoch": 2.059270516717325, + "grad_norm": 0.4452501919855169, + "learning_rate": 3.559129637919744e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19416233897209167, + "step": 1355, + "valid_targets_mean": 4871.6, + "valid_targets_min": 1124 + }, + { + "epoch": 2.066869300911854, + "grad_norm": 0.5486933381235863, + "learning_rate": 3.5543714159261576e-05, + "loss": 0.1991, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21765755116939545, + "step": 1360, + "valid_targets_mean": 4549.6, + "valid_targets_min": 756 + }, + { + "epoch": 2.074468085106383, + "grad_norm": 0.46291383560602983, + "learning_rate": 3.5495908713228774e-05, + "loss": 0.1955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22245173156261444, + "step": 1365, + "valid_targets_mean": 4870.3, + "valid_targets_min": 829 + }, + { + "epoch": 2.082066869300912, + "grad_norm": 0.43346588262932606, + "learning_rate": 3.544788072764173e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16855968534946442, + "step": 1370, + "valid_targets_mean": 5086.5, + "valid_targets_min": 1568 + }, + { + "epoch": 2.0896656534954405, + "grad_norm": 0.46753186284834775, + "learning_rate": 3.5399630892239036e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17375516891479492, + "step": 1375, + "valid_targets_mean": 4855.5, + "valid_targets_min": 1441 + }, + { + "epoch": 2.0972644376899696, + "grad_norm": 0.5417497219166529, + "learning_rate": 3.535115989994533e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22097179293632507, + "step": 1380, + "valid_targets_mean": 4113.6, + "valid_targets_min": 608 + }, + { + "epoch": 2.1048632218844983, + "grad_norm": 0.5031450181476247, + "learning_rate": 3.530246844686133e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20294921100139618, + "step": 1385, + "valid_targets_mean": 4546.1, + "valid_targets_min": 610 + }, + { + "epoch": 2.1124620060790273, + "grad_norm": 0.4352408641314383, + "learning_rate": 3.5253557232253805e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18328158557415009, + "step": 1390, + "valid_targets_mean": 4523.4, + "valid_targets_min": 317 + }, + { + "epoch": 2.1200607902735564, + "grad_norm": 0.4965079663934899, + "learning_rate": 3.520442695854558e-05, + "loss": 0.1903, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19759127497673035, + "step": 1395, + "valid_targets_mean": 4122.6, + "valid_targets_min": 651 + }, + { + "epoch": 2.127659574468085, + "grad_norm": 0.4372048480171316, + "learning_rate": 3.515507833130543e-05, + "loss": 0.2099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.260989785194397, + "step": 1400, + "valid_targets_mean": 5767.1, + "valid_targets_min": 647 + }, + { + "epoch": 2.135258358662614, + "grad_norm": 0.4432824108788825, + "learning_rate": 3.510551205923793e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1798432469367981, + "step": 1405, + "valid_targets_mean": 4649.9, + "valid_targets_min": 887 + }, + { + "epoch": 2.142857142857143, + "grad_norm": 0.47056895174749847, + "learning_rate": 3.50557288541733e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28239643573760986, + "step": 1410, + "valid_targets_mean": 5144.9, + "valid_targets_min": 668 + }, + { + "epoch": 2.150455927051672, + "grad_norm": 0.48033852287738354, + "learning_rate": 3.5005729431057176e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18898963928222656, + "step": 1415, + "valid_targets_mean": 4188.7, + "valid_targets_min": 642 + }, + { + "epoch": 2.1580547112462005, + "grad_norm": 0.44907430023415723, + "learning_rate": 3.4955514507940335e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21947622299194336, + "step": 1420, + "valid_targets_mean": 4689.1, + "valid_targets_min": 811 + }, + { + "epoch": 2.1656534954407296, + "grad_norm": 0.48987352731998796, + "learning_rate": 3.490508480596839e-05, + "loss": 0.2001, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23712974786758423, + "step": 1425, + "valid_targets_mean": 4526.9, + "valid_targets_min": 1906 + }, + { + "epoch": 2.1732522796352582, + "grad_norm": 0.4635255839622089, + "learning_rate": 3.485444104937144e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19113010168075562, + "step": 1430, + "valid_targets_mean": 5611.0, + "valid_targets_min": 506 + }, + { + "epoch": 2.1808510638297873, + "grad_norm": 0.5259676202088338, + "learning_rate": 3.4803583965453635e-05, + "loss": 0.1946, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1842001974582672, + "step": 1435, + "valid_targets_mean": 5102.6, + "valid_targets_min": 2495 + }, + { + "epoch": 2.188449848024316, + "grad_norm": 0.44606267698952673, + "learning_rate": 3.475251428458281e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16230902075767517, + "step": 1440, + "valid_targets_mean": 4659.9, + "valid_targets_min": 559 + }, + { + "epoch": 2.196048632218845, + "grad_norm": 0.4688428145128331, + "learning_rate": 3.4701232740179876e-05, + "loss": 0.19, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20408767461776733, + "step": 1445, + "valid_targets_mean": 4121.8, + "valid_targets_min": 589 + }, + { + "epoch": 2.2036474164133737, + "grad_norm": 0.5991278401653526, + "learning_rate": 3.464974006870841e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1730174869298935, + "step": 1450, + "valid_targets_mean": 4380.2, + "valid_targets_min": 872 + }, + { + "epoch": 2.211246200607903, + "grad_norm": 0.5317059372292597, + "learning_rate": 3.4598037009664e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21209636330604553, + "step": 1455, + "valid_targets_mean": 4217.3, + "valid_targets_min": 576 + }, + { + "epoch": 2.2188449848024314, + "grad_norm": 0.430168348759962, + "learning_rate": 3.454612430556365e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1825931817293167, + "step": 1460, + "valid_targets_mean": 5261.9, + "valid_targets_min": 605 + }, + { + "epoch": 2.2264437689969605, + "grad_norm": 0.3749747542503873, + "learning_rate": 3.44940027019351e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17250074446201324, + "step": 1465, + "valid_targets_mean": 5484.4, + "valid_targets_min": 702 + }, + { + "epoch": 2.2340425531914896, + "grad_norm": 0.4590123136778495, + "learning_rate": 3.444167294730617e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21813246607780457, + "step": 1470, + "valid_targets_mean": 5121.4, + "valid_targets_min": 306 + }, + { + "epoch": 2.2416413373860182, + "grad_norm": 0.49931169623759, + "learning_rate": 3.4389135793193935e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.218278706073761, + "step": 1475, + "valid_targets_mean": 3389.2, + "valid_targets_min": 545 + }, + { + "epoch": 2.2492401215805473, + "grad_norm": 0.44543315364919134, + "learning_rate": 3.4336391994094e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18760555982589722, + "step": 1480, + "valid_targets_mean": 4307.0, + "valid_targets_min": 666 + }, + { + "epoch": 2.256838905775076, + "grad_norm": 0.4886663621134491, + "learning_rate": 3.4283442307469625e-05, + "loss": 0.1915, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2062349021434784, + "step": 1485, + "valid_targets_mean": 4356.3, + "valid_targets_min": 766 + }, + { + "epoch": 2.264437689969605, + "grad_norm": 0.4376731027831436, + "learning_rate": 3.423028749374086e-05, + "loss": 0.1772, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1697554588317871, + "step": 1490, + "valid_targets_mean": 4621.5, + "valid_targets_min": 626 + }, + { + "epoch": 2.2720364741641337, + "grad_norm": 0.44145953072536115, + "learning_rate": 3.417692831627361e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17671933770179749, + "step": 1495, + "valid_targets_mean": 4243.6, + "valid_targets_min": 490 + }, + { + "epoch": 2.2796352583586628, + "grad_norm": 0.46231753746429277, + "learning_rate": 3.412336554136871e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.217167928814888, + "step": 1500, + "valid_targets_mean": 4889.9, + "valid_targets_min": 940 + }, + { + "epoch": 2.2872340425531914, + "grad_norm": 0.41849411158618643, + "learning_rate": 3.406959993825088e-05, + "loss": 0.1879, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1857946813106537, + "step": 1505, + "valid_targets_mean": 4538.6, + "valid_targets_min": 1136 + }, + { + "epoch": 2.2948328267477205, + "grad_norm": 0.42440454933579597, + "learning_rate": 3.4015632279057675e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19058769941329956, + "step": 1510, + "valid_targets_mean": 5174.3, + "valid_targets_min": 2350 + }, + { + "epoch": 2.302431610942249, + "grad_norm": 0.4531043322688376, + "learning_rate": 3.396146333882846e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.207040473818779, + "step": 1515, + "valid_targets_mean": 5085.9, + "valid_targets_min": 1122 + }, + { + "epoch": 2.310030395136778, + "grad_norm": 0.43663796857915893, + "learning_rate": 3.3907093895493186e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19464045763015747, + "step": 1520, + "valid_targets_mean": 4950.9, + "valid_targets_min": 709 + }, + { + "epoch": 2.317629179331307, + "grad_norm": 0.42579767171142163, + "learning_rate": 3.385252472986129e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17376163601875305, + "step": 1525, + "valid_targets_mean": 5796.6, + "valid_targets_min": 829 + }, + { + "epoch": 2.325227963525836, + "grad_norm": 0.4254323477143335, + "learning_rate": 3.379775662561045e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20658773183822632, + "step": 1530, + "valid_targets_mean": 4603.7, + "valid_targets_min": 937 + }, + { + "epoch": 2.3328267477203646, + "grad_norm": 0.40841513731217177, + "learning_rate": 3.374279036927535e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18336258828639984, + "step": 1535, + "valid_targets_mean": 4597.2, + "valid_targets_min": 376 + }, + { + "epoch": 2.3404255319148937, + "grad_norm": 0.40660499202393396, + "learning_rate": 3.368762675023635e-05, + "loss": 0.1931, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17899194359779358, + "step": 1540, + "valid_targets_mean": 5368.1, + "valid_targets_min": 748 + }, + { + "epoch": 2.3480243161094223, + "grad_norm": 0.4869497094603196, + "learning_rate": 3.363226656070819e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20963457226753235, + "step": 1545, + "valid_targets_mean": 4233.9, + "valid_targets_min": 799 + }, + { + "epoch": 2.3556231003039514, + "grad_norm": 0.42891641992075347, + "learning_rate": 3.3576710595728586e-05, + "loss": 0.2024, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18602633476257324, + "step": 1550, + "valid_targets_mean": 4363.8, + "valid_targets_min": 156 + }, + { + "epoch": 2.36322188449848, + "grad_norm": 0.4784395146562949, + "learning_rate": 3.352095965314682e-05, + "loss": 0.1974, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20554772019386292, + "step": 1555, + "valid_targets_mean": 4412.3, + "valid_targets_min": 732 + }, + { + "epoch": 2.370820668693009, + "grad_norm": 0.3940116430034305, + "learning_rate": 3.3465014533612295e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16700813174247742, + "step": 1560, + "valid_targets_mean": 4859.4, + "valid_targets_min": 2115 + }, + { + "epoch": 2.378419452887538, + "grad_norm": 0.4053824724130465, + "learning_rate": 3.340887604056301e-05, + "loss": 0.1959, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19125846028327942, + "step": 1565, + "valid_targets_mean": 5586.5, + "valid_targets_min": 855 + }, + { + "epoch": 2.386018237082067, + "grad_norm": 0.467001631524023, + "learning_rate": 3.335254498021404e-05, + "loss": 0.1987, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21299326419830322, + "step": 1570, + "valid_targets_mean": 4965.8, + "valid_targets_min": 333 + }, + { + "epoch": 2.393617021276596, + "grad_norm": 0.46807262963090795, + "learning_rate": 3.329602216154594e-05, + "loss": 0.2011, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21306315064430237, + "step": 1575, + "valid_targets_mean": 4142.6, + "valid_targets_min": 618 + }, + { + "epoch": 2.4012158054711246, + "grad_norm": 0.4351369955863477, + "learning_rate": 3.323930839629318e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.192465141415596, + "step": 1580, + "valid_targets_mean": 4713.5, + "valid_targets_min": 805 + }, + { + "epoch": 2.4088145896656536, + "grad_norm": 0.4664276306785285, + "learning_rate": 3.318240449893242e-05, + "loss": 0.1994, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20806226134300232, + "step": 1585, + "valid_targets_mean": 4086.8, + "valid_targets_min": 616 + }, + { + "epoch": 2.4164133738601823, + "grad_norm": 0.4691257656452498, + "learning_rate": 3.3125311286670836e-05, + "loss": 0.1982, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23262707889080048, + "step": 1590, + "valid_targets_mean": 4931.9, + "valid_targets_min": 633 + }, + { + "epoch": 2.4240121580547114, + "grad_norm": 0.42906847559664973, + "learning_rate": 3.3068029579434404e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19915705919265747, + "step": 1595, + "valid_targets_mean": 4431.0, + "valid_targets_min": 397 + }, + { + "epoch": 2.43161094224924, + "grad_norm": 0.4018736513467274, + "learning_rate": 3.3010560199856105e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16372954845428467, + "step": 1600, + "valid_targets_mean": 5064.1, + "valid_targets_min": 1229 + }, + { + "epoch": 2.439209726443769, + "grad_norm": 0.4520461458479404, + "learning_rate": 3.2952903973264115e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19510574638843536, + "step": 1605, + "valid_targets_mean": 4399.9, + "valid_targets_min": 646 + }, + { + "epoch": 2.4468085106382977, + "grad_norm": 0.3918033831100767, + "learning_rate": 3.289506172766997e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1935116946697235, + "step": 1610, + "valid_targets_mean": 5341.8, + "valid_targets_min": 801 + }, + { + "epoch": 2.454407294832827, + "grad_norm": 0.43294616241650447, + "learning_rate": 3.283703429375663e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16868659853935242, + "step": 1615, + "valid_targets_mean": 4395.4, + "valid_targets_min": 518 + }, + { + "epoch": 2.4620060790273555, + "grad_norm": 0.5245421221092823, + "learning_rate": 3.2778822504866594e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2138800024986267, + "step": 1620, + "valid_targets_mean": 3523.9, + "valid_targets_min": 736 + }, + { + "epoch": 2.4696048632218845, + "grad_norm": 0.44947552448463496, + "learning_rate": 3.272042719698992e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2089487910270691, + "step": 1625, + "valid_targets_mean": 4868.6, + "valid_targets_min": 708 + }, + { + "epoch": 2.477203647416413, + "grad_norm": 0.5745948604608038, + "learning_rate": 3.2661849208752205e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15748457610607147, + "step": 1630, + "valid_targets_mean": 5253.4, + "valid_targets_min": 1100 + }, + { + "epoch": 2.4848024316109423, + "grad_norm": 0.466310291931274, + "learning_rate": 3.2603089381402574e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18000131845474243, + "step": 1635, + "valid_targets_mean": 4540.9, + "valid_targets_min": 594 + }, + { + "epoch": 2.4924012158054714, + "grad_norm": 0.4887847049484213, + "learning_rate": 3.254414855880155e-05, + "loss": 0.1979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19799655675888062, + "step": 1640, + "valid_targets_mean": 3895.6, + "valid_targets_min": 632 + }, + { + "epoch": 2.5, + "grad_norm": 0.44432072579472404, + "learning_rate": 3.2485027587408965e-05, + "loss": 0.1925, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18566831946372986, + "step": 1645, + "valid_targets_mean": 4716.8, + "valid_targets_min": 785 + }, + { + "epoch": 2.5075987841945286, + "grad_norm": 0.7254579276839739, + "learning_rate": 3.2425727316271814e-05, + "loss": 0.2156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2835049033164978, + "step": 1650, + "valid_targets_mean": 4848.5, + "valid_targets_min": 1102 + }, + { + "epoch": 2.5151975683890577, + "grad_norm": 0.45281348742054633, + "learning_rate": 3.2366248597012037e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21764954924583435, + "step": 1655, + "valid_targets_mean": 4480.8, + "valid_targets_min": 808 + }, + { + "epoch": 2.522796352583587, + "grad_norm": 0.45329008128640347, + "learning_rate": 3.230659228381432e-05, + "loss": 0.1872, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19859766960144043, + "step": 1660, + "valid_targets_mean": 4694.1, + "valid_targets_min": 710 + }, + { + "epoch": 2.5303951367781155, + "grad_norm": 0.3991066726887112, + "learning_rate": 3.2246759233413765e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18775928020477295, + "step": 1665, + "valid_targets_mean": 5537.6, + "valid_targets_min": 2270 + }, + { + "epoch": 2.5379939209726445, + "grad_norm": 0.45660075830572106, + "learning_rate": 3.218675030508367e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21159949898719788, + "step": 1670, + "valid_targets_mean": 4234.2, + "valid_targets_min": 465 + }, + { + "epoch": 2.545592705167173, + "grad_norm": 0.47845524476355505, + "learning_rate": 3.212656636062314e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18367314338684082, + "step": 1675, + "valid_targets_mean": 3884.0, + "valid_targets_min": 609 + }, + { + "epoch": 2.5531914893617023, + "grad_norm": 0.47337775511493685, + "learning_rate": 3.2066208264344695e-05, + "loss": 0.1954, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19395776093006134, + "step": 1680, + "valid_targets_mean": 4240.3, + "valid_targets_min": 1731 + }, + { + "epoch": 2.560790273556231, + "grad_norm": 0.49086260546972615, + "learning_rate": 3.200567688306192e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2288236916065216, + "step": 1685, + "valid_targets_mean": 3920.8, + "valid_targets_min": 658 + }, + { + "epoch": 2.56838905775076, + "grad_norm": 0.48820274570312466, + "learning_rate": 3.194497308607694e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2082967460155487, + "step": 1690, + "valid_targets_mean": 4256.8, + "valid_targets_min": 440 + }, + { + "epoch": 2.5759878419452886, + "grad_norm": 0.43205681075466934, + "learning_rate": 3.1884097745167986e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1597200632095337, + "step": 1695, + "valid_targets_mean": 4130.8, + "valid_targets_min": 878 + }, + { + "epoch": 2.5835866261398177, + "grad_norm": 0.5054254906668799, + "learning_rate": 3.182305173457688e-05, + "loss": 0.2034, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2262079417705536, + "step": 1700, + "valid_targets_mean": 4460.6, + "valid_targets_min": 401 + }, + { + "epoch": 2.5911854103343464, + "grad_norm": 0.504689704310772, + "learning_rate": 3.1761835930996424e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1995968520641327, + "step": 1705, + "valid_targets_mean": 3648.3, + "valid_targets_min": 558 + }, + { + "epoch": 2.5987841945288754, + "grad_norm": 0.4392316385094666, + "learning_rate": 3.1700451213557896e-05, + "loss": 0.2026, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20353609323501587, + "step": 1710, + "valid_targets_mean": 4641.1, + "valid_targets_min": 845 + }, + { + "epoch": 2.6063829787234045, + "grad_norm": 0.44339996982872454, + "learning_rate": 3.1638898463818336e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15977585315704346, + "step": 1715, + "valid_targets_mean": 4352.1, + "valid_targets_min": 827 + }, + { + "epoch": 2.613981762917933, + "grad_norm": 0.5398477413573792, + "learning_rate": 3.157717856574794e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18378981947898865, + "step": 1720, + "valid_targets_mean": 4131.6, + "valid_targets_min": 331 + }, + { + "epoch": 2.621580547112462, + "grad_norm": 0.39672349034501203, + "learning_rate": 3.151529240571737e-05, + "loss": 0.2048, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19032993912696838, + "step": 1725, + "valid_targets_mean": 5932.7, + "valid_targets_min": 2148 + }, + { + "epoch": 2.629179331306991, + "grad_norm": 0.4826721393487148, + "learning_rate": 3.1453240872484974e-05, + "loss": 0.2209, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21052716672420502, + "step": 1730, + "valid_targets_mean": 4064.6, + "valid_targets_min": 713 + }, + { + "epoch": 2.63677811550152, + "grad_norm": 0.40797959815259904, + "learning_rate": 3.139102485718407e-05, + "loss": 0.1932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18385851383209229, + "step": 1735, + "valid_targets_mean": 5094.5, + "valid_targets_min": 774 + }, + { + "epoch": 2.6443768996960486, + "grad_norm": 0.49388764627269777, + "learning_rate": 3.1328645253310136e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20751240849494934, + "step": 1740, + "valid_targets_mean": 4489.2, + "valid_targets_min": 668 + }, + { + "epoch": 2.6519756838905773, + "grad_norm": 0.41221441649011, + "learning_rate": 3.126610295670798e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18350102007389069, + "step": 1745, + "valid_targets_mean": 5068.5, + "valid_targets_min": 1047 + }, + { + "epoch": 2.6595744680851063, + "grad_norm": 0.43487564674132706, + "learning_rate": 3.120339886555885e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1929788887500763, + "step": 1750, + "valid_targets_mean": 4648.7, + "valid_targets_min": 1712 + }, + { + "epoch": 2.6671732522796354, + "grad_norm": 0.3931845374369816, + "learning_rate": 3.114053388036757e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17840635776519775, + "step": 1755, + "valid_targets_mean": 5762.4, + "valid_targets_min": 537 + }, + { + "epoch": 2.674772036474164, + "grad_norm": 0.46325220533318656, + "learning_rate": 3.1077508903949594e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15973162651062012, + "step": 1760, + "valid_targets_mean": 4577.9, + "valid_targets_min": 1190 + }, + { + "epoch": 2.682370820668693, + "grad_norm": 0.448314100365069, + "learning_rate": 3.1014324841418025e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21308580040931702, + "step": 1765, + "valid_targets_mean": 4523.5, + "valid_targets_min": 689 + }, + { + "epoch": 2.689969604863222, + "grad_norm": 0.4126949676808008, + "learning_rate": 3.095098260017065e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17054617404937744, + "step": 1770, + "valid_targets_mean": 4661.3, + "valid_targets_min": 940 + }, + { + "epoch": 2.697568389057751, + "grad_norm": 0.520594987471646, + "learning_rate": 3.088748308987687e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18412724137306213, + "step": 1775, + "valid_targets_mean": 4541.3, + "valid_targets_min": 425 + }, + { + "epoch": 2.7051671732522795, + "grad_norm": 0.4292929031518839, + "learning_rate": 3.082382722246467e-05, + "loss": 0.2083, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18564680218696594, + "step": 1780, + "valid_targets_mean": 4726.2, + "valid_targets_min": 849 + }, + { + "epoch": 2.7127659574468086, + "grad_norm": 0.4143100769110258, + "learning_rate": 3.07600159121075e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19952227175235748, + "step": 1785, + "valid_targets_mean": 6396.9, + "valid_targets_min": 2406 + }, + { + "epoch": 2.7203647416413372, + "grad_norm": 0.5138412636257836, + "learning_rate": 3.069605007521115e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21767953038215637, + "step": 1790, + "valid_targets_mean": 4080.7, + "valid_targets_min": 855 + }, + { + "epoch": 2.7279635258358663, + "grad_norm": 0.9274624474305112, + "learning_rate": 3.063193063040061e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2023240178823471, + "step": 1795, + "valid_targets_mean": 4271.2, + "valid_targets_min": 299 + }, + { + "epoch": 2.735562310030395, + "grad_norm": 0.4505218736377883, + "learning_rate": 3.0567658498506835e-05, + "loss": 0.2006, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19997933506965637, + "step": 1800, + "valid_targets_mean": 6215.2, + "valid_targets_min": 834 + }, + { + "epoch": 2.743161094224924, + "grad_norm": 0.45112279832870705, + "learning_rate": 3.050323460255359e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20109710097312927, + "step": 1805, + "valid_targets_mean": 4547.4, + "valid_targets_min": 594 + }, + { + "epoch": 2.750759878419453, + "grad_norm": 0.45439465696192066, + "learning_rate": 3.0438659867744104e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22971929609775543, + "step": 1810, + "valid_targets_mean": 4679.8, + "valid_targets_min": 630 + }, + { + "epoch": 2.7583586626139818, + "grad_norm": 0.4239951858010868, + "learning_rate": 3.0373935221447846e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18463487923145294, + "step": 1815, + "valid_targets_mean": 4548.8, + "valid_targets_min": 832 + }, + { + "epoch": 2.7659574468085104, + "grad_norm": 0.46571130038179387, + "learning_rate": 3.030906159318721e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2257537692785263, + "step": 1820, + "valid_targets_mean": 4535.1, + "valid_targets_min": 822 + }, + { + "epoch": 2.7735562310030395, + "grad_norm": 0.5251100785646777, + "learning_rate": 3.0244039914624127e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16839849948883057, + "step": 1825, + "valid_targets_mean": 4547.6, + "valid_targets_min": 551 + }, + { + "epoch": 2.7811550151975686, + "grad_norm": 0.46551771612143145, + "learning_rate": 3.017887111954671e-05, + "loss": 0.2063, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21392841637134552, + "step": 1830, + "valid_targets_mean": 4989.8, + "valid_targets_min": 1092 + }, + { + "epoch": 2.788753799392097, + "grad_norm": 0.45282545824018966, + "learning_rate": 3.0113556143855836e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17507150769233704, + "step": 1835, + "valid_targets_mean": 3937.5, + "valid_targets_min": 433 + }, + { + "epoch": 2.7963525835866263, + "grad_norm": 0.8573211013484289, + "learning_rate": 3.004809592555172e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1929873377084732, + "step": 1840, + "valid_targets_mean": 4398.6, + "valid_targets_min": 447 + }, + { + "epoch": 2.803951367781155, + "grad_norm": 0.4511946895966249, + "learning_rate": 2.9982491404720408e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25569894909858704, + "step": 1845, + "valid_targets_mean": 5263.1, + "valid_targets_min": 704 + }, + { + "epoch": 2.811550151975684, + "grad_norm": 0.4075365034632511, + "learning_rate": 2.9916743523520336e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18254083395004272, + "step": 1850, + "valid_targets_mean": 5106.1, + "valid_targets_min": 1353 + }, + { + "epoch": 2.8191489361702127, + "grad_norm": 0.39703735203471957, + "learning_rate": 2.9850853226168738e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1689942181110382, + "step": 1855, + "valid_targets_mean": 5267.1, + "valid_targets_min": 1235 + }, + { + "epoch": 2.8267477203647418, + "grad_norm": 0.4300034911707786, + "learning_rate": 2.9784821458928116e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19870391488075256, + "step": 1860, + "valid_targets_mean": 4619.8, + "valid_targets_min": 672 + }, + { + "epoch": 2.8343465045592704, + "grad_norm": 0.4731017750575238, + "learning_rate": 2.9718649170092653e-05, + "loss": 0.2051, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2331341654062271, + "step": 1865, + "valid_targets_mean": 4517.1, + "valid_targets_min": 673 + }, + { + "epoch": 2.8419452887537995, + "grad_norm": 0.47588128492307696, + "learning_rate": 2.9652337309974582e-05, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17996634542942047, + "step": 1870, + "valid_targets_mean": 3985.2, + "valid_targets_min": 962 + }, + { + "epoch": 2.849544072948328, + "grad_norm": 0.5647869677542844, + "learning_rate": 2.958588683089056e-05, + "loss": 0.194, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21768032014369965, + "step": 1875, + "valid_targets_mean": 3708.1, + "valid_targets_min": 342 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.4169198649338515, + "learning_rate": 2.9519298687147945e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17996545135974884, + "step": 1880, + "valid_targets_mean": 4830.1, + "valid_targets_min": 834 + }, + { + "epoch": 2.8647416413373863, + "grad_norm": 0.46342593085414296, + "learning_rate": 2.9452573835031154e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17930959165096283, + "step": 1885, + "valid_targets_mean": 5006.9, + "valid_targets_min": 1043 + }, + { + "epoch": 2.872340425531915, + "grad_norm": 0.5952920705102613, + "learning_rate": 2.9385713232787877e-05, + "loss": 0.1878, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17503748834133148, + "step": 1890, + "valid_targets_mean": 4345.5, + "valid_targets_min": 702 + }, + { + "epoch": 2.8799392097264436, + "grad_norm": 1.5179001765998326, + "learning_rate": 2.9318717840615352e-05, + "loss": 0.1895, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17403315007686615, + "step": 1895, + "valid_targets_mean": 5409.5, + "valid_targets_min": 2159 + }, + { + "epoch": 2.8875379939209727, + "grad_norm": 0.45039073631427357, + "learning_rate": 2.9251588620646543e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19473648071289062, + "step": 1900, + "valid_targets_mean": 4456.6, + "valid_targets_min": 561 + }, + { + "epoch": 2.8951367781155017, + "grad_norm": 0.5386037897816643, + "learning_rate": 2.9184326536936356e-05, + "loss": 0.1999, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20459288358688354, + "step": 1905, + "valid_targets_mean": 5313.1, + "valid_targets_min": 1889 + }, + { + "epoch": 2.9027355623100304, + "grad_norm": 0.5171036731287691, + "learning_rate": 2.9116932555447764e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19427910447120667, + "step": 1910, + "valid_targets_mean": 4551.8, + "valid_targets_min": 640 + }, + { + "epoch": 2.910334346504559, + "grad_norm": 0.3956323253175454, + "learning_rate": 2.904940764403795e-05, + "loss": 0.1921, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16807705163955688, + "step": 1915, + "valid_targets_mean": 5148.9, + "valid_targets_min": 1114 + }, + { + "epoch": 2.917933130699088, + "grad_norm": 0.4609179179948364, + "learning_rate": 2.898175277244441e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18651928007602692, + "step": 1920, + "valid_targets_mean": 4270.8, + "valid_targets_min": 829 + }, + { + "epoch": 2.925531914893617, + "grad_norm": 0.3746013132633794, + "learning_rate": 2.891396891227103e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18432965874671936, + "step": 1925, + "valid_targets_mean": 5464.6, + "valid_targets_min": 869 + }, + { + "epoch": 2.933130699088146, + "grad_norm": 0.44757073319717156, + "learning_rate": 2.88460570369741e-05, + "loss": 0.1966, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2037007212638855, + "step": 1930, + "valid_targets_mean": 5044.7, + "valid_targets_min": 650 + }, + { + "epoch": 2.940729483282675, + "grad_norm": 0.5165497089688482, + "learning_rate": 2.877801812184838e-05, + "loss": 0.1865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19577309489250183, + "step": 1935, + "valid_targets_mean": 3723.6, + "valid_targets_min": 263 + }, + { + "epoch": 2.9483282674772036, + "grad_norm": 0.5248044839976569, + "learning_rate": 2.8709853144013063e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17888793349266052, + "step": 1940, + "valid_targets_mean": 5473.5, + "valid_targets_min": 2971 + }, + { + "epoch": 2.9559270516717326, + "grad_norm": 0.46290978562548923, + "learning_rate": 2.8641563082397755e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19724613428115845, + "step": 1945, + "valid_targets_mean": 4504.6, + "valid_targets_min": 683 + }, + { + "epoch": 2.9635258358662613, + "grad_norm": 0.47247130378721536, + "learning_rate": 2.857314891772841e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1742001324892044, + "step": 1950, + "valid_targets_mean": 4378.8, + "valid_targets_min": 900 + }, + { + "epoch": 2.9711246200607904, + "grad_norm": 0.38540265276997787, + "learning_rate": 2.850461163251325e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1545667052268982, + "step": 1955, + "valid_targets_mean": 4485.9, + "valid_targets_min": 825 + }, + { + "epoch": 2.978723404255319, + "grad_norm": 0.4375842807396514, + "learning_rate": 2.8435952211028652e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20349517464637756, + "step": 1960, + "valid_targets_mean": 4611.9, + "valid_targets_min": 1259 + }, + { + "epoch": 2.986322188449848, + "grad_norm": 0.393339700104493, + "learning_rate": 2.8367171639305014e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1762651801109314, + "step": 1965, + "valid_targets_mean": 4854.0, + "valid_targets_min": 1188 + }, + { + "epoch": 2.9939209726443767, + "grad_norm": 0.4408583709677064, + "learning_rate": 2.8298270905112598e-05, + "loss": 0.2093, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17071180045604706, + "step": 1970, + "valid_targets_mean": 4334.9, + "valid_targets_min": 1033 + }, + { + "epoch": 3.001519756838906, + "grad_norm": 0.4509769720521314, + "learning_rate": 2.8229250997947338e-05, + "loss": 0.1899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19761110842227936, + "step": 1975, + "valid_targets_mean": 5020.1, + "valid_targets_min": 464 + }, + { + "epoch": 3.0091185410334345, + "grad_norm": 0.44397107618825393, + "learning_rate": 2.8160112909016627e-05, + "loss": 0.1964, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15861201286315918, + "step": 1980, + "valid_targets_mean": 4464.9, + "valid_targets_min": 434 + }, + { + "epoch": 3.0167173252279635, + "grad_norm": 0.4590917607720272, + "learning_rate": 2.8090857631225105e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17721785604953766, + "step": 1985, + "valid_targets_mean": 4993.2, + "valid_targets_min": 443 + }, + { + "epoch": 3.024316109422492, + "grad_norm": 1.6476449501443067, + "learning_rate": 2.8021486159160365e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21059048175811768, + "step": 1990, + "valid_targets_mean": 3777.8, + "valid_targets_min": 567 + }, + { + "epoch": 3.0319148936170213, + "grad_norm": 0.3955766292317692, + "learning_rate": 2.7951999489078697e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15698814392089844, + "step": 1995, + "valid_targets_mean": 5706.5, + "valid_targets_min": 1789 + }, + { + "epoch": 3.0395136778115504, + "grad_norm": 0.6060112559647506, + "learning_rate": 2.7882398618890763e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17217731475830078, + "step": 2000, + "valid_targets_mean": 4799.2, + "valid_targets_min": 675 + }, + { + "epoch": 3.047112462006079, + "grad_norm": 0.48214170254530214, + "learning_rate": 2.781268454814728e-05, + "loss": 0.1743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18182724714279175, + "step": 2005, + "valid_targets_mean": 4482.8, + "valid_targets_min": 268 + }, + { + "epoch": 3.054711246200608, + "grad_norm": 0.4254186774774426, + "learning_rate": 2.7742858278024657e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17712415754795074, + "step": 2010, + "valid_targets_mean": 5016.2, + "valid_targets_min": 933 + }, + { + "epoch": 3.0623100303951367, + "grad_norm": 0.4561837122624953, + "learning_rate": 2.7672920811310615e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1763693392276764, + "step": 2015, + "valid_targets_mean": 5105.2, + "valid_targets_min": 1198 + }, + { + "epoch": 3.069908814589666, + "grad_norm": 0.6538535804895353, + "learning_rate": 2.7602873152389795e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15733420848846436, + "step": 2020, + "valid_targets_mean": 4136.8, + "valid_targets_min": 1002 + }, + { + "epoch": 3.0775075987841944, + "grad_norm": 0.4383597724717559, + "learning_rate": 2.7532716307229325e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18817880749702454, + "step": 2025, + "valid_targets_mean": 4361.4, + "valid_targets_min": 566 + }, + { + "epoch": 3.0851063829787235, + "grad_norm": 0.4468187218329756, + "learning_rate": 2.7462451283364372e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18754516541957855, + "step": 2030, + "valid_targets_mean": 5809.6, + "valid_targets_min": 2367 + }, + { + "epoch": 3.092705167173252, + "grad_norm": 0.4923380379793573, + "learning_rate": 2.739207908988369e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1901119500398636, + "step": 2035, + "valid_targets_mean": 4648.6, + "valid_targets_min": 651 + }, + { + "epoch": 3.1003039513677813, + "grad_norm": 0.43950335258891504, + "learning_rate": 2.7321600737415103e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1579056680202484, + "step": 2040, + "valid_targets_mean": 4848.3, + "valid_targets_min": 513 + }, + { + "epoch": 3.10790273556231, + "grad_norm": 0.43941575889354745, + "learning_rate": 2.7251017238111014e-05, + "loss": 0.1818, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17752686142921448, + "step": 2045, + "valid_targets_mean": 4887.9, + "valid_targets_min": 552 + }, + { + "epoch": 3.115501519756839, + "grad_norm": 0.43006557381731564, + "learning_rate": 2.718032960563384e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1756466031074524, + "step": 2050, + "valid_targets_mean": 5296.2, + "valid_targets_min": 1395 + }, + { + "epoch": 3.1231003039513676, + "grad_norm": 0.3962603202091622, + "learning_rate": 2.710953885514149e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13223829865455627, + "step": 2055, + "valid_targets_mean": 4657.5, + "valid_targets_min": 1941 + }, + { + "epoch": 3.1306990881458967, + "grad_norm": 0.47787066171111203, + "learning_rate": 2.7038646003272778e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1752995252609253, + "step": 2060, + "valid_targets_mean": 4332.5, + "valid_targets_min": 523 + }, + { + "epoch": 3.1382978723404253, + "grad_norm": 0.49333053682614264, + "learning_rate": 2.6967652068132787e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1864144653081894, + "step": 2065, + "valid_targets_mean": 3611.1, + "valid_targets_min": 567 + }, + { + "epoch": 3.1458966565349544, + "grad_norm": 0.4781276995658994, + "learning_rate": 2.6896558069278294e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14743542671203613, + "step": 2070, + "valid_targets_mean": 4010.8, + "valid_targets_min": 543 + }, + { + "epoch": 3.1534954407294835, + "grad_norm": 0.4481532756441824, + "learning_rate": 2.6825365027703104e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1980597823858261, + "step": 2075, + "valid_targets_mean": 5291.1, + "valid_targets_min": 446 + }, + { + "epoch": 3.161094224924012, + "grad_norm": 0.44883490224285716, + "learning_rate": 2.6754073965823394e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19897349178791046, + "step": 2080, + "valid_targets_mean": 5601.1, + "valid_targets_min": 460 + }, + { + "epoch": 3.1686930091185412, + "grad_norm": 0.47818122842888633, + "learning_rate": 2.668268590746303e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16886334121227264, + "step": 2085, + "valid_targets_mean": 3956.9, + "valid_targets_min": 672 + }, + { + "epoch": 3.17629179331307, + "grad_norm": 0.44699975246028895, + "learning_rate": 2.6611201877838858e-05, + "loss": 0.173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1886739879846573, + "step": 2090, + "valid_targets_mean": 5496.0, + "valid_targets_min": 1891 + }, + { + "epoch": 3.183890577507599, + "grad_norm": 0.46241733640368204, + "learning_rate": 2.6539622903545992e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16013459861278534, + "step": 2095, + "valid_targets_mean": 5061.5, + "valid_targets_min": 842 + }, + { + "epoch": 3.1914893617021276, + "grad_norm": 0.419537783672445, + "learning_rate": 2.6467950012543055e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15453095734119415, + "step": 2100, + "valid_targets_mean": 5147.8, + "valid_targets_min": 1776 + }, + { + "epoch": 3.1990881458966567, + "grad_norm": 0.4430059744559326, + "learning_rate": 2.639618423413742e-05, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15962053835391998, + "step": 2105, + "valid_targets_mean": 4258.2, + "valid_targets_min": 719 + }, + { + "epoch": 3.2066869300911853, + "grad_norm": 0.4601551349059534, + "learning_rate": 2.6324326598970447e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19752000272274017, + "step": 2110, + "valid_targets_mean": 5594.7, + "valid_targets_min": 2157 + }, + { + "epoch": 3.2142857142857144, + "grad_norm": 0.4432941671854632, + "learning_rate": 2.6252378139002666e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16792172193527222, + "step": 2115, + "valid_targets_mean": 4681.6, + "valid_targets_min": 831 + }, + { + "epoch": 3.221884498480243, + "grad_norm": 1.0521842556961734, + "learning_rate": 2.618033988749895e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18343770503997803, + "step": 2120, + "valid_targets_mean": 4442.8, + "valid_targets_min": 593 + }, + { + "epoch": 3.229483282674772, + "grad_norm": 0.4703582807896684, + "learning_rate": 2.6108212879013694e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.182417631149292, + "step": 2125, + "valid_targets_mean": 5050.2, + "valid_targets_min": 882 + }, + { + "epoch": 3.237082066869301, + "grad_norm": 0.5066204484636513, + "learning_rate": 2.6035998149375928e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23296314477920532, + "step": 2130, + "valid_targets_mean": 4509.6, + "valid_targets_min": 585 + }, + { + "epoch": 3.24468085106383, + "grad_norm": 0.47427217437454394, + "learning_rate": 2.5963696735674487e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1860450804233551, + "step": 2135, + "valid_targets_mean": 4647.2, + "valid_targets_min": 697 + }, + { + "epoch": 3.2522796352583585, + "grad_norm": 0.4250698588091916, + "learning_rate": 2.5891309676243084e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16689786314964294, + "step": 2140, + "valid_targets_mean": 4561.6, + "valid_targets_min": 756 + }, + { + "epoch": 3.2598784194528876, + "grad_norm": 0.47029762963920585, + "learning_rate": 2.5818838010645393e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15192772448062897, + "step": 2145, + "valid_targets_mean": 4186.0, + "valid_targets_min": 434 + }, + { + "epoch": 3.2674772036474162, + "grad_norm": 0.4554599632142264, + "learning_rate": 2.5746282779660145e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17261144518852234, + "step": 2150, + "valid_targets_mean": 4568.7, + "valid_targets_min": 1854 + }, + { + "epoch": 3.2750759878419453, + "grad_norm": 0.4502827257632833, + "learning_rate": 2.5673645025266174e-05, + "loss": 0.178, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1779678761959076, + "step": 2155, + "valid_targets_mean": 4289.7, + "valid_targets_min": 781 + }, + { + "epoch": 3.282674772036474, + "grad_norm": 0.4787827101748988, + "learning_rate": 2.5600925790627423e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1940232217311859, + "step": 2160, + "valid_targets_mean": 4486.1, + "valid_targets_min": 171 + }, + { + "epoch": 3.290273556231003, + "grad_norm": 0.45438086048067683, + "learning_rate": 2.5528126120078018e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19441285729408264, + "step": 2165, + "valid_targets_mean": 5050.3, + "valid_targets_min": 1728 + }, + { + "epoch": 3.297872340425532, + "grad_norm": 0.40566641587346486, + "learning_rate": 2.545524705910722e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17091290652751923, + "step": 2170, + "valid_targets_mean": 5804.4, + "valid_targets_min": 1156 + }, + { + "epoch": 3.3054711246200608, + "grad_norm": 0.42598008462425585, + "learning_rate": 2.5382289654344433e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16645470261573792, + "step": 2175, + "valid_targets_mean": 4971.6, + "valid_targets_min": 673 + }, + { + "epoch": 3.31306990881459, + "grad_norm": 0.440353868517696, + "learning_rate": 2.530925495354418e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17304345965385437, + "step": 2180, + "valid_targets_mean": 4552.6, + "valid_targets_min": 1465 + }, + { + "epoch": 3.3206686930091185, + "grad_norm": 0.45749279193739023, + "learning_rate": 2.523614400557103e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16047537326812744, + "step": 2185, + "valid_targets_mean": 4131.4, + "valid_targets_min": 641 + }, + { + "epoch": 3.3282674772036476, + "grad_norm": 0.42763280388882297, + "learning_rate": 2.516295786038457e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19560299813747406, + "step": 2190, + "valid_targets_mean": 5307.1, + "valid_targets_min": 686 + }, + { + "epoch": 3.335866261398176, + "grad_norm": 0.45229487644284055, + "learning_rate": 2.5089697569024293e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17956998944282532, + "step": 2195, + "valid_targets_mean": 4415.1, + "valid_targets_min": 479 + }, + { + "epoch": 3.3434650455927053, + "grad_norm": 0.49947402828635423, + "learning_rate": 2.501636418359453e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19726476073265076, + "step": 2200, + "valid_targets_mean": 4160.0, + "valid_targets_min": 702 + }, + { + "epoch": 3.351063829787234, + "grad_norm": 0.5624015949940533, + "learning_rate": 2.4942958757249322e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17548981308937073, + "step": 2205, + "valid_targets_mean": 4641.6, + "valid_targets_min": 741 + }, + { + "epoch": 3.358662613981763, + "grad_norm": 0.47188014259395983, + "learning_rate": 2.4869482344177297e-05, + "loss": 0.1912, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.191510871052742, + "step": 2210, + "valid_targets_mean": 5098.9, + "valid_targets_min": 732 + }, + { + "epoch": 3.3662613981762917, + "grad_norm": 0.4044372525372168, + "learning_rate": 2.479593599958655e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1595427691936493, + "step": 2215, + "valid_targets_mean": 4987.1, + "valid_targets_min": 2150 + }, + { + "epoch": 3.3738601823708207, + "grad_norm": 0.4610514723176418, + "learning_rate": 2.472232077968947e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18796592950820923, + "step": 2220, + "valid_targets_mean": 4755.8, + "valid_targets_min": 804 + }, + { + "epoch": 3.3814589665653494, + "grad_norm": 0.4762033973991841, + "learning_rate": 2.4648637741687572e-05, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17647765576839447, + "step": 2225, + "valid_targets_mean": 4052.8, + "valid_targets_min": 1097 + }, + { + "epoch": 3.3890577507598785, + "grad_norm": 0.5674700704733731, + "learning_rate": 2.4574887943756322e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21014189720153809, + "step": 2230, + "valid_targets_mean": 5642.9, + "valid_targets_min": 887 + }, + { + "epoch": 3.396656534954407, + "grad_norm": 0.45407587371796526, + "learning_rate": 2.450107244502994e-05, + "loss": 0.1911, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18419310450553894, + "step": 2235, + "valid_targets_mean": 4964.2, + "valid_targets_min": 631 + }, + { + "epoch": 3.404255319148936, + "grad_norm": 0.6036076286770431, + "learning_rate": 2.442719230558619e-05, + "loss": 0.1935, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19704486429691315, + "step": 2240, + "valid_targets_mean": 3990.9, + "valid_targets_min": 879 + }, + { + "epoch": 3.4118541033434653, + "grad_norm": 0.5044426709371705, + "learning_rate": 2.435324858643114e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17976920306682587, + "step": 2245, + "valid_targets_mean": 4607.1, + "valid_targets_min": 276 + }, + { + "epoch": 3.419452887537994, + "grad_norm": 0.47427419752248345, + "learning_rate": 2.4279242349483945e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18903280794620514, + "step": 2250, + "valid_targets_mean": 4716.1, + "valid_targets_min": 1806 + }, + { + "epoch": 3.4270516717325226, + "grad_norm": 0.5315098096977057, + "learning_rate": 2.42051746575616e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22392138838768005, + "step": 2255, + "valid_targets_mean": 3496.4, + "valid_targets_min": 263 + }, + { + "epoch": 3.4346504559270516, + "grad_norm": 0.39060444130604355, + "learning_rate": 2.413104657436365e-05, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14147084951400757, + "step": 2260, + "valid_targets_mean": 4482.1, + "valid_targets_min": 527 + }, + { + "epoch": 3.4422492401215807, + "grad_norm": 0.44338388859066824, + "learning_rate": 2.4056859164456933e-05, + "loss": 0.1787, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20094552636146545, + "step": 2265, + "valid_targets_mean": 5268.6, + "valid_targets_min": 601 + }, + { + "epoch": 3.4498480243161094, + "grad_norm": 0.4361432780966179, + "learning_rate": 2.3982613493260298e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1768036186695099, + "step": 2270, + "valid_targets_mean": 5291.0, + "valid_targets_min": 506 + }, + { + "epoch": 3.4574468085106385, + "grad_norm": 0.5069263015357972, + "learning_rate": 2.390831062702929e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19090662896633148, + "step": 2275, + "valid_targets_mean": 4792.0, + "valid_targets_min": 361 + }, + { + "epoch": 3.465045592705167, + "grad_norm": 0.40616123029264917, + "learning_rate": 2.383395163284083e-05, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15730628371238708, + "step": 2280, + "valid_targets_mean": 5398.0, + "valid_targets_min": 1965 + }, + { + "epoch": 3.472644376899696, + "grad_norm": 0.4339438381996928, + "learning_rate": 2.3759537578577926e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16227596998214722, + "step": 2285, + "valid_targets_mean": 5389.4, + "valid_targets_min": 682 + }, + { + "epoch": 3.480243161094225, + "grad_norm": 0.44867888022495267, + "learning_rate": 2.3685069532914292e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1895052194595337, + "step": 2290, + "valid_targets_mean": 5373.5, + "valid_targets_min": 1378 + }, + { + "epoch": 3.487841945288754, + "grad_norm": 0.45859302831224075, + "learning_rate": 2.3610548565299044e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.161161869764328, + "step": 2295, + "valid_targets_mean": 4974.8, + "valid_targets_min": 963 + }, + { + "epoch": 3.4954407294832825, + "grad_norm": 0.47990973790811997, + "learning_rate": 2.35359757459413e-05, + "loss": 0.1861, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1807297170162201, + "step": 2300, + "valid_targets_mean": 3966.6, + "valid_targets_min": 594 + }, + { + "epoch": 3.5030395136778116, + "grad_norm": 0.4458814501241054, + "learning_rate": 2.3461352145794835e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1584734320640564, + "step": 2305, + "valid_targets_mean": 4710.0, + "valid_targets_min": 1068 + }, + { + "epoch": 3.5106382978723403, + "grad_norm": 0.5204277337187776, + "learning_rate": 2.338667883654271e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19432294368743896, + "step": 2310, + "valid_targets_mean": 3841.7, + "valid_targets_min": 403 + }, + { + "epoch": 3.5182370820668694, + "grad_norm": 0.5002703947790278, + "learning_rate": 2.3311956890581845e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1830371916294098, + "step": 2315, + "valid_targets_mean": 4266.8, + "valid_targets_min": 588 + }, + { + "epoch": 3.5258358662613984, + "grad_norm": 0.45671055489988216, + "learning_rate": 2.3237187381007654e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17926675081253052, + "step": 2320, + "valid_targets_mean": 4515.0, + "valid_targets_min": 654 + }, + { + "epoch": 3.533434650455927, + "grad_norm": 0.44706162372061575, + "learning_rate": 2.3162371381598627e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17958760261535645, + "step": 2325, + "valid_targets_mean": 4538.9, + "valid_targets_min": 646 + }, + { + "epoch": 3.5410334346504557, + "grad_norm": 0.48526337740475023, + "learning_rate": 2.308750996680089e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1857941448688507, + "step": 2330, + "valid_targets_mean": 4541.9, + "valid_targets_min": 718 + }, + { + "epoch": 3.548632218844985, + "grad_norm": 0.6863209953782875, + "learning_rate": 2.3012604211712787e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15989352762699127, + "step": 2335, + "valid_targets_mean": 4711.3, + "valid_targets_min": 905 + }, + { + "epoch": 3.556231003039514, + "grad_norm": 0.43601817882878313, + "learning_rate": 2.2937655192069453e-05, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15695396065711975, + "step": 2340, + "valid_targets_mean": 4525.1, + "valid_targets_min": 554 + }, + { + "epoch": 3.5638297872340425, + "grad_norm": 0.4384690019868411, + "learning_rate": 2.286266398422734e-05, + "loss": 0.1885, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17277196049690247, + "step": 2345, + "valid_targets_mean": 5536.7, + "valid_targets_min": 966 + }, + { + "epoch": 3.571428571428571, + "grad_norm": 0.50720927906344, + "learning_rate": 2.278763166514879e-05, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1746547669172287, + "step": 2350, + "valid_targets_mean": 4262.4, + "valid_targets_min": 688 + }, + { + "epoch": 3.5790273556231003, + "grad_norm": 0.5082859794711881, + "learning_rate": 2.2712559312386525e-05, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16094228625297546, + "step": 2355, + "valid_targets_mean": 4750.5, + "valid_targets_min": 354 + }, + { + "epoch": 3.5866261398176293, + "grad_norm": 0.5324693739038185, + "learning_rate": 2.2637448004068227e-05, + "loss": 0.186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1898549497127533, + "step": 2360, + "valid_targets_mean": 4889.1, + "valid_targets_min": 868 + }, + { + "epoch": 3.594224924012158, + "grad_norm": 0.40332096235924436, + "learning_rate": 2.2562298818881005e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1677415668964386, + "step": 2365, + "valid_targets_mean": 4815.6, + "valid_targets_min": 449 + }, + { + "epoch": 3.601823708206687, + "grad_norm": 0.4975962462446565, + "learning_rate": 2.2487112836055932e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1909307986497879, + "step": 2370, + "valid_targets_mean": 4240.7, + "valid_targets_min": 873 + }, + { + "epoch": 3.6094224924012157, + "grad_norm": 0.4333440171846238, + "learning_rate": 2.241189113535253e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17210185527801514, + "step": 2375, + "valid_targets_mean": 4546.3, + "valid_targets_min": 814 + }, + { + "epoch": 3.617021276595745, + "grad_norm": 0.4286229722867218, + "learning_rate": 2.2336634797043294e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17077794671058655, + "step": 2380, + "valid_targets_mean": 4828.9, + "valid_targets_min": 2075 + }, + { + "epoch": 3.6246200607902734, + "grad_norm": 0.5351152453484341, + "learning_rate": 2.226134490189813e-05, + "loss": 0.1864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20225580036640167, + "step": 2385, + "valid_targets_mean": 4493.8, + "valid_targets_min": 816 + }, + { + "epoch": 3.6322188449848025, + "grad_norm": 0.4595793337143316, + "learning_rate": 2.2186022531168877e-05, + "loss": 0.1806, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18067389726638794, + "step": 2390, + "valid_targets_mean": 4226.4, + "valid_targets_min": 876 + }, + { + "epoch": 3.639817629179331, + "grad_norm": 0.4376288036773449, + "learning_rate": 2.2110668766573756e-05, + "loss": 0.1839, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17458736896514893, + "step": 2395, + "valid_targets_mean": 4607.4, + "valid_targets_min": 378 + }, + { + "epoch": 3.6474164133738602, + "grad_norm": 0.4235273238288212, + "learning_rate": 2.2035284690281835e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16206535696983337, + "step": 2400, + "valid_targets_mean": 4671.6, + "valid_targets_min": 427 + }, + { + "epoch": 3.655015197568389, + "grad_norm": 0.4645835996278293, + "learning_rate": 2.1959871384897508e-05, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1612105816602707, + "step": 2405, + "valid_targets_mean": 4012.2, + "valid_targets_min": 736 + }, + { + "epoch": 3.662613981762918, + "grad_norm": 0.4339263612443116, + "learning_rate": 2.188442993344492e-05, + "loss": 0.1828, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18110080063343048, + "step": 2410, + "valid_targets_mean": 5371.9, + "valid_targets_min": 1880 + }, + { + "epoch": 3.670212765957447, + "grad_norm": 0.5156390412963044, + "learning_rate": 2.1808961419352433e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19328826665878296, + "step": 2415, + "valid_targets_mean": 5049.5, + "valid_targets_min": 1733 + }, + { + "epoch": 3.6778115501519757, + "grad_norm": 0.4257958182746545, + "learning_rate": 2.173346692643706e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15053753554821014, + "step": 2420, + "valid_targets_mean": 4545.5, + "valid_targets_min": 845 + }, + { + "epoch": 3.6854103343465043, + "grad_norm": 0.41421446171755727, + "learning_rate": 2.1657947538888907e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15670928359031677, + "step": 2425, + "valid_targets_mean": 4735.4, + "valid_targets_min": 2303 + }, + { + "epoch": 3.6930091185410334, + "grad_norm": 0.46488209722389967, + "learning_rate": 2.158240434125557e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17502865195274353, + "step": 2430, + "valid_targets_mean": 4069.9, + "valid_targets_min": 843 + }, + { + "epoch": 3.7006079027355625, + "grad_norm": 0.44273639823588856, + "learning_rate": 2.150683841842662e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17152325809001923, + "step": 2435, + "valid_targets_mean": 4846.6, + "valid_targets_min": 1167 + }, + { + "epoch": 3.708206686930091, + "grad_norm": 0.41156415643010313, + "learning_rate": 2.1431250855617978e-05, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15151536464691162, + "step": 2440, + "valid_targets_mean": 4472.1, + "valid_targets_min": 280 + }, + { + "epoch": 3.71580547112462, + "grad_norm": 0.45636761630984557, + "learning_rate": 2.1355642738356327e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16730879247188568, + "step": 2445, + "valid_targets_mean": 3877.2, + "valid_targets_min": 406 + }, + { + "epoch": 3.723404255319149, + "grad_norm": 0.41821546008836596, + "learning_rate": 2.128001515246355e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1652117371559143, + "step": 2450, + "valid_targets_mean": 5117.6, + "valid_targets_min": 1054 + }, + { + "epoch": 3.731003039513678, + "grad_norm": 0.5195131380448494, + "learning_rate": 2.1204369184041115e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21220675110816956, + "step": 2455, + "valid_targets_mean": 3935.9, + "valid_targets_min": 608 + }, + { + "epoch": 3.7386018237082066, + "grad_norm": 0.4759365332843418, + "learning_rate": 2.1128705919454488e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21190626919269562, + "step": 2460, + "valid_targets_mean": 5204.1, + "valid_targets_min": 776 + }, + { + "epoch": 3.7462006079027357, + "grad_norm": 0.44340296838544296, + "learning_rate": 2.1053026445317534e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16357441246509552, + "step": 2465, + "valid_targets_mean": 5137.4, + "valid_targets_min": 531 + }, + { + "epoch": 3.7537993920972643, + "grad_norm": 0.602232408360396, + "learning_rate": 2.09773318484769e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14004534482955933, + "step": 2470, + "valid_targets_mean": 4292.4, + "valid_targets_min": 861 + }, + { + "epoch": 3.7613981762917934, + "grad_norm": 0.4860093104756001, + "learning_rate": 2.0901623215996406e-05, + "loss": 0.1819, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17179439961910248, + "step": 2475, + "valid_targets_mean": 4222.4, + "valid_targets_min": 2111 + }, + { + "epoch": 3.768996960486322, + "grad_norm": 0.5345377630387919, + "learning_rate": 2.082590163514146e-05, + "loss": 0.188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19142243266105652, + "step": 2480, + "valid_targets_mean": 3549.6, + "valid_targets_min": 650 + }, + { + "epoch": 3.776595744680851, + "grad_norm": 0.5131611773395347, + "learning_rate": 2.07501681933634e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23167961835861206, + "step": 2485, + "valid_targets_mean": 4783.1, + "valid_targets_min": 422 + }, + { + "epoch": 3.78419452887538, + "grad_norm": 0.43772278574910306, + "learning_rate": 2.0674423978283924e-05, + "loss": 0.1702, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.159462109208107, + "step": 2490, + "valid_targets_mean": 4151.8, + "valid_targets_min": 1045 + }, + { + "epoch": 3.791793313069909, + "grad_norm": 0.48634040579611115, + "learning_rate": 2.059867007767943e-05, + "loss": 0.1729, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1738453060388565, + "step": 2495, + "valid_targets_mean": 4146.1, + "valid_targets_min": 314 + }, + { + "epoch": 3.7993920972644375, + "grad_norm": 0.45093092640477583, + "learning_rate": 2.0522907579465413e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1663065254688263, + "step": 2500, + "valid_targets_mean": 3941.2, + "valid_targets_min": 1010 + }, + { + "epoch": 3.8069908814589666, + "grad_norm": 0.50654438664862, + "learning_rate": 2.0447137571680856e-05, + "loss": 0.1978, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20836275815963745, + "step": 2505, + "valid_targets_mean": 4301.9, + "valid_targets_min": 870 + }, + { + "epoch": 3.8145896656534957, + "grad_norm": 0.47213930537918863, + "learning_rate": 2.037136114247257e-05, + "loss": 0.1906, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19136860966682434, + "step": 2510, + "valid_targets_mean": 4427.4, + "valid_targets_min": 571 + }, + { + "epoch": 3.8221884498480243, + "grad_norm": 0.6652881182583804, + "learning_rate": 2.0295579380079596e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16894681751728058, + "step": 2515, + "valid_targets_mean": 4250.4, + "valid_targets_min": 473 + }, + { + "epoch": 3.829787234042553, + "grad_norm": 0.4220064998153064, + "learning_rate": 2.0219793372817557e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1527949720621109, + "step": 2520, + "valid_targets_mean": 4826.4, + "valid_targets_min": 827 + }, + { + "epoch": 3.837386018237082, + "grad_norm": 0.43668863176276407, + "learning_rate": 2.0144004209063042e-05, + "loss": 0.176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1740725338459015, + "step": 2525, + "valid_targets_mean": 4150.2, + "valid_targets_min": 772 + }, + { + "epoch": 3.844984802431611, + "grad_norm": 0.3823174418425911, + "learning_rate": 2.0068212977237983e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14751499891281128, + "step": 2530, + "valid_targets_mean": 5399.6, + "valid_targets_min": 2779 + }, + { + "epoch": 3.8525835866261398, + "grad_norm": 0.4587095534889756, + "learning_rate": 1.999242076579398e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19451266527175903, + "step": 2535, + "valid_targets_mean": 4256.1, + "valid_targets_min": 743 + }, + { + "epoch": 3.860182370820669, + "grad_norm": 0.45169233975095224, + "learning_rate": 1.9916628663196743e-05, + "loss": 0.1892, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17704080045223236, + "step": 2540, + "valid_targets_mean": 4775.8, + "valid_targets_min": 2350 + }, + { + "epoch": 3.8677811550151975, + "grad_norm": 1.675127625058686, + "learning_rate": 1.9840837757910383e-05, + "loss": 0.1858, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1759578138589859, + "step": 2545, + "valid_targets_mean": 4113.9, + "valid_targets_min": 720 + }, + { + "epoch": 3.8753799392097266, + "grad_norm": 0.43203621299681155, + "learning_rate": 1.976504913838184e-05, + "loss": 0.1811, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1715025007724762, + "step": 2550, + "valid_targets_mean": 5221.4, + "valid_targets_min": 1839 + }, + { + "epoch": 3.882978723404255, + "grad_norm": 0.4384904057940854, + "learning_rate": 1.968926389302521e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16733646392822266, + "step": 2555, + "valid_targets_mean": 4565.3, + "valid_targets_min": 609 + }, + { + "epoch": 3.8905775075987843, + "grad_norm": 0.4481617113710614, + "learning_rate": 1.9613483110206154e-05, + "loss": 0.1701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17321643233299255, + "step": 2560, + "valid_targets_mean": 4512.8, + "valid_targets_min": 895 + }, + { + "epoch": 3.898176291793313, + "grad_norm": 0.45819511671682567, + "learning_rate": 1.9537707878226216e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17141106724739075, + "step": 2565, + "valid_targets_mean": 4315.5, + "valid_targets_min": 440 + }, + { + "epoch": 3.905775075987842, + "grad_norm": 0.4764235545857351, + "learning_rate": 1.9461939285307247e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1686917096376419, + "step": 2570, + "valid_targets_mean": 3827.4, + "valid_targets_min": 622 + }, + { + "epoch": 3.9133738601823707, + "grad_norm": 0.4437209902659711, + "learning_rate": 1.9386178419575745e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16246762871742249, + "step": 2575, + "valid_targets_mean": 4545.2, + "valid_targets_min": 746 + }, + { + "epoch": 3.9209726443768997, + "grad_norm": 0.3844261379651969, + "learning_rate": 1.931042636904724e-05, + "loss": 0.1759, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17688338458538055, + "step": 2580, + "valid_targets_mean": 5911.6, + "valid_targets_min": 3130 + }, + { + "epoch": 3.928571428571429, + "grad_norm": 0.4052859116056726, + "learning_rate": 1.923468422161066e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16950541734695435, + "step": 2585, + "valid_targets_mean": 5731.0, + "valid_targets_min": 509 + }, + { + "epoch": 3.9361702127659575, + "grad_norm": 0.4236870787112286, + "learning_rate": 1.9158953065012715e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15792736411094666, + "step": 2590, + "valid_targets_mean": 5720.6, + "valid_targets_min": 2293 + }, + { + "epoch": 3.943768996960486, + "grad_norm": 0.46365326810020263, + "learning_rate": 1.9083233986842275e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17180944979190826, + "step": 2595, + "valid_targets_mean": 5083.0, + "valid_targets_min": 711 + }, + { + "epoch": 3.951367781155015, + "grad_norm": 0.44863750873052993, + "learning_rate": 1.900752807451475e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16900160908699036, + "step": 2600, + "valid_targets_mean": 4392.1, + "valid_targets_min": 810 + }, + { + "epoch": 3.9589665653495443, + "grad_norm": 0.462622398920299, + "learning_rate": 1.8931836415256468e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16800816357135773, + "step": 2605, + "valid_targets_mean": 4713.2, + "valid_targets_min": 740 + }, + { + "epoch": 3.966565349544073, + "grad_norm": 0.46050975543187894, + "learning_rate": 1.885616009608907e-05, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14823880791664124, + "step": 2610, + "valid_targets_mean": 4082.6, + "valid_targets_min": 401 + }, + { + "epoch": 3.9741641337386016, + "grad_norm": 0.46421973514436904, + "learning_rate": 1.8780500203813902e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18864090740680695, + "step": 2615, + "valid_targets_mean": 4411.6, + "valid_targets_min": 655 + }, + { + "epoch": 3.9817629179331306, + "grad_norm": 0.4291031448782404, + "learning_rate": 1.870485782499638e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17856952548027039, + "step": 2620, + "valid_targets_mean": 4765.4, + "valid_targets_min": 348 + }, + { + "epoch": 3.9893617021276597, + "grad_norm": 0.44985063891512206, + "learning_rate": 1.8629234045950434e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16248270869255066, + "step": 2625, + "valid_targets_mean": 4526.0, + "valid_targets_min": 1027 + }, + { + "epoch": 3.9969604863221884, + "grad_norm": 0.4752973434947488, + "learning_rate": 1.8553629952722853e-05, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17370517551898956, + "step": 2630, + "valid_targets_mean": 4128.8, + "valid_targets_min": 843 + }, + { + "epoch": 4.004559270516717, + "grad_norm": 0.4277885641586408, + "learning_rate": 1.8478046631077734e-05, + "loss": 0.167, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1758805513381958, + "step": 2635, + "valid_targets_mean": 4960.5, + "valid_targets_min": 647 + }, + { + "epoch": 4.0121580547112465, + "grad_norm": 0.4340489860844752, + "learning_rate": 1.8402485166480854e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1884659081697464, + "step": 2640, + "valid_targets_mean": 5273.1, + "valid_targets_min": 787 + }, + { + "epoch": 4.019756838905775, + "grad_norm": 0.4325490917240746, + "learning_rate": 1.8326946644084112e-05, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1543048769235611, + "step": 2645, + "valid_targets_mean": 4604.4, + "valid_targets_min": 547 + }, + { + "epoch": 4.027355623100304, + "grad_norm": 0.5514612078158647, + "learning_rate": 1.8251432148709912e-05, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1609114408493042, + "step": 2650, + "valid_targets_mean": 3597.3, + "valid_targets_min": 695 + }, + { + "epoch": 4.0349544072948325, + "grad_norm": 0.43126338580266377, + "learning_rate": 1.817594276483563e-05, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18365256488323212, + "step": 2655, + "valid_targets_mean": 5326.8, + "valid_targets_min": 666 + }, + { + "epoch": 4.042553191489362, + "grad_norm": 0.4737332570423805, + "learning_rate": 1.8100479576577973e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17224054038524628, + "step": 2660, + "valid_targets_mean": 4818.4, + "valid_targets_min": 918 + }, + { + "epoch": 4.050151975683891, + "grad_norm": 0.4438849641405412, + "learning_rate": 1.8025043667677484e-05, + "loss": 0.166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1628122627735138, + "step": 2665, + "valid_targets_mean": 4852.9, + "valid_targets_min": 1421 + }, + { + "epoch": 4.057750759878419, + "grad_norm": 0.5200587086302579, + "learning_rate": 1.794963612148291e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1755530685186386, + "step": 2670, + "valid_targets_mean": 3932.9, + "valid_targets_min": 844 + }, + { + "epoch": 4.065349544072948, + "grad_norm": 0.5142004311196928, + "learning_rate": 1.7874258020935708e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16136983036994934, + "step": 2675, + "valid_targets_mean": 3818.9, + "valid_targets_min": 758 + }, + { + "epoch": 4.072948328267477, + "grad_norm": 0.47669155432301397, + "learning_rate": 1.7798910448554433e-05, + "loss": 0.1749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17607629299163818, + "step": 2680, + "valid_targets_mean": 4376.6, + "valid_targets_min": 1368 + }, + { + "epoch": 4.080547112462006, + "grad_norm": 0.4655137661586852, + "learning_rate": 1.7723594486419245e-05, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15961049497127533, + "step": 2685, + "valid_targets_mean": 4512.1, + "valid_targets_min": 1601 + }, + { + "epoch": 4.088145896656535, + "grad_norm": 0.7525257215139017, + "learning_rate": 1.764831121615631e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21296311914920807, + "step": 2690, + "valid_targets_mean": 4298.8, + "valid_targets_min": 756 + }, + { + "epoch": 4.095744680851064, + "grad_norm": 0.4433451433036404, + "learning_rate": 1.7573061718922347e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16739048063755035, + "step": 2695, + "valid_targets_mean": 5009.2, + "valid_targets_min": 853 + }, + { + "epoch": 4.103343465045593, + "grad_norm": 0.4830355618281073, + "learning_rate": 1.7497847075389004e-05, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16623979806900024, + "step": 2700, + "valid_targets_mean": 4105.4, + "valid_targets_min": 714 + }, + { + "epoch": 4.1109422492401215, + "grad_norm": 0.4179613714457159, + "learning_rate": 1.7422668365727428e-05, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16168740391731262, + "step": 2705, + "valid_targets_mean": 5317.4, + "valid_targets_min": 1811 + }, + { + "epoch": 4.11854103343465, + "grad_norm": 0.5056361715181314, + "learning_rate": 1.734752666959268e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16789555549621582, + "step": 2710, + "valid_targets_mean": 4172.6, + "valid_targets_min": 666 + }, + { + "epoch": 4.12613981762918, + "grad_norm": 0.48722281404971973, + "learning_rate": 1.7272423066108306e-05, + "loss": 0.171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17382162809371948, + "step": 2715, + "valid_targets_mean": 4376.5, + "valid_targets_min": 263 + }, + { + "epoch": 4.133738601823708, + "grad_norm": 0.4790825604864442, + "learning_rate": 1.7197358633850744e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16230902075767517, + "step": 2720, + "valid_targets_mean": 4323.8, + "valid_targets_min": 370 + }, + { + "epoch": 4.141337386018237, + "grad_norm": 0.41425680904063444, + "learning_rate": 1.7122334450833933e-05, + "loss": 0.1532, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18430155515670776, + "step": 2725, + "valid_targets_mean": 6086.1, + "valid_targets_min": 567 + }, + { + "epoch": 4.148936170212766, + "grad_norm": 0.4752171364109056, + "learning_rate": 1.7047351594493755e-05, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1950949728488922, + "step": 2730, + "valid_targets_mean": 5050.2, + "valid_targets_min": 804 + }, + { + "epoch": 4.156534954407295, + "grad_norm": 0.4757519244575605, + "learning_rate": 1.6972411141672614e-05, + "loss": 0.1621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1867167055606842, + "step": 2735, + "valid_targets_mean": 4470.1, + "valid_targets_min": 828 + }, + { + "epoch": 4.164133738601824, + "grad_norm": 0.4513608069410081, + "learning_rate": 1.6897514168603924e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1613789200782776, + "step": 2740, + "valid_targets_mean": 4477.4, + "valid_targets_min": 742 + }, + { + "epoch": 4.171732522796352, + "grad_norm": 0.5141640145999388, + "learning_rate": 1.6822661750896707e-05, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19608411192893982, + "step": 2745, + "valid_targets_mean": 4257.2, + "valid_targets_min": 892 + }, + { + "epoch": 4.179331306990881, + "grad_norm": 0.4724318719284284, + "learning_rate": 1.67478549635201e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1456223726272583, + "step": 2750, + "valid_targets_mean": 4336.9, + "valid_targets_min": 541 + }, + { + "epoch": 4.186930091185411, + "grad_norm": 0.4970941462233452, + "learning_rate": 1.6673094880787933e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1658686399459839, + "step": 2755, + "valid_targets_mean": 4121.6, + "valid_targets_min": 601 + }, + { + "epoch": 4.194528875379939, + "grad_norm": 0.5000186608612818, + "learning_rate": 1.6598382576343312e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16827929019927979, + "step": 2760, + "valid_targets_mean": 5388.8, + "valid_targets_min": 1463 + }, + { + "epoch": 4.202127659574468, + "grad_norm": 0.4684433964610707, + "learning_rate": 1.6523719123143186e-05, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15693283081054688, + "step": 2765, + "valid_targets_mean": 4821.7, + "valid_targets_min": 746 + }, + { + "epoch": 4.2097264437689965, + "grad_norm": 0.45967354792383214, + "learning_rate": 1.6449105593442936e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19241458177566528, + "step": 2770, + "valid_targets_mean": 4597.6, + "valid_targets_min": 855 + }, + { + "epoch": 4.217325227963526, + "grad_norm": 0.4733160030781719, + "learning_rate": 1.6374543058780998e-05, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16462013125419617, + "step": 2775, + "valid_targets_mean": 4405.4, + "valid_targets_min": 850 + }, + { + "epoch": 4.224924012158055, + "grad_norm": 0.4815352707164652, + "learning_rate": 1.6300032589963436e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15910238027572632, + "step": 2780, + "valid_targets_mean": 4541.5, + "valid_targets_min": 909 + }, + { + "epoch": 4.232522796352583, + "grad_norm": 0.47250106872988024, + "learning_rate": 1.6225575257048622e-05, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1517636775970459, + "step": 2785, + "valid_targets_mean": 4741.7, + "valid_targets_min": 1120 + }, + { + "epoch": 4.240121580547113, + "grad_norm": 1.1194758985013034, + "learning_rate": 1.6151172129331786e-05, + "loss": 0.1722, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16665083169937134, + "step": 2790, + "valid_targets_mean": 4795.9, + "valid_targets_min": 567 + }, + { + "epoch": 4.2477203647416415, + "grad_norm": 0.5081276624925015, + "learning_rate": 1.6076824275329758e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16445952653884888, + "step": 2795, + "valid_targets_mean": 4864.7, + "valid_targets_min": 1608 + }, + { + "epoch": 4.25531914893617, + "grad_norm": 0.48328270331504203, + "learning_rate": 1.6002532762765524e-05, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17975090444087982, + "step": 2800, + "valid_targets_mean": 4240.7, + "valid_targets_min": 616 + }, + { + "epoch": 4.262917933130699, + "grad_norm": 0.46912231409179, + "learning_rate": 1.5928298658552988e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1919006109237671, + "step": 2805, + "valid_targets_mean": 5055.8, + "valid_targets_min": 588 + }, + { + "epoch": 4.270516717325228, + "grad_norm": 0.6210976552983519, + "learning_rate": 1.5854123028781557e-05, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1582549810409546, + "step": 2810, + "valid_targets_mean": 3868.8, + "valid_targets_min": 365 + }, + { + "epoch": 4.278115501519757, + "grad_norm": 0.4072288493810071, + "learning_rate": 1.5780006938700917e-05, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16181963682174683, + "step": 2815, + "valid_targets_mean": 5906.6, + "valid_targets_min": 1016 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 0.45582906884120683, + "learning_rate": 1.5705951452705654e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14693260192871094, + "step": 2820, + "valid_targets_mean": 4234.7, + "valid_targets_min": 811 + }, + { + "epoch": 4.293313069908814, + "grad_norm": 0.5429695412336001, + "learning_rate": 1.5631957634320048e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1935248076915741, + "step": 2825, + "valid_targets_mean": 4832.9, + "valid_targets_min": 774 + }, + { + "epoch": 4.300911854103344, + "grad_norm": 0.46340922363130344, + "learning_rate": 1.555802654618272e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17274600267410278, + "step": 2830, + "valid_targets_mean": 5227.2, + "valid_targets_min": 933 + }, + { + "epoch": 4.308510638297872, + "grad_norm": 0.8213856644150885, + "learning_rate": 1.5484159250031445e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17811016738414764, + "step": 2835, + "valid_targets_mean": 3817.1, + "valid_targets_min": 692 + }, + { + "epoch": 4.316109422492401, + "grad_norm": 0.4391910613617335, + "learning_rate": 1.541035680668785e-05, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17826610803604126, + "step": 2840, + "valid_targets_mean": 5220.5, + "valid_targets_min": 571 + }, + { + "epoch": 4.32370820668693, + "grad_norm": 0.5400136046572052, + "learning_rate": 1.5336620276042193e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17823699116706848, + "step": 2845, + "valid_targets_mean": 4212.0, + "valid_targets_min": 156 + }, + { + "epoch": 4.331306990881459, + "grad_norm": 0.4689908498540122, + "learning_rate": 1.526295071703817e-05, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14596086740493774, + "step": 2850, + "valid_targets_mean": 4071.1, + "valid_targets_min": 842 + }, + { + "epoch": 4.338905775075988, + "grad_norm": 0.43293471403933603, + "learning_rate": 1.5189349187657665e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1921626329421997, + "step": 2855, + "valid_targets_mean": 5745.9, + "valid_targets_min": 899 + }, + { + "epoch": 4.3465045592705165, + "grad_norm": 0.4470655930449328, + "learning_rate": 1.5115816744905596e-05, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135872483253479, + "step": 2860, + "valid_targets_mean": 4759.4, + "valid_targets_min": 872 + }, + { + "epoch": 4.354103343465045, + "grad_norm": 0.5712823358033652, + "learning_rate": 1.504235444479469e-05, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20416559278964996, + "step": 2865, + "valid_targets_mean": 4337.3, + "valid_targets_min": 361 + }, + { + "epoch": 4.361702127659575, + "grad_norm": 0.4182532739028534, + "learning_rate": 1.4968963342330369e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17232292890548706, + "step": 2870, + "valid_targets_mean": 5397.1, + "valid_targets_min": 1864 + }, + { + "epoch": 4.369300911854103, + "grad_norm": 0.4274336280548651, + "learning_rate": 1.4895644491495547e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17278841137886047, + "step": 2875, + "valid_targets_mean": 5085.6, + "valid_targets_min": 758 + }, + { + "epoch": 4.376899696048632, + "grad_norm": 0.4542314839910377, + "learning_rate": 1.4822398945235545e-05, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15121108293533325, + "step": 2880, + "valid_targets_mean": 4447.8, + "valid_targets_min": 422 + }, + { + "epoch": 4.3844984802431615, + "grad_norm": 0.4154772615137915, + "learning_rate": 1.4749227755442927e-05, + "loss": 0.1761, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1821514070034027, + "step": 2885, + "valid_targets_mean": 5593.2, + "valid_targets_min": 2877 + }, + { + "epoch": 4.39209726443769, + "grad_norm": 0.4559773456464244, + "learning_rate": 1.4676131972942416e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16653896868228912, + "step": 2890, + "valid_targets_mean": 4738.8, + "valid_targets_min": 920 + }, + { + "epoch": 4.399696048632219, + "grad_norm": 0.41739506385968844, + "learning_rate": 1.4603112647475795e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15362367033958435, + "step": 2895, + "valid_targets_mean": 5320.4, + "valid_targets_min": 552 + }, + { + "epoch": 4.407294832826747, + "grad_norm": 0.3845071167642644, + "learning_rate": 1.4530170827686831e-05, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13495799899101257, + "step": 2900, + "valid_targets_mean": 5222.8, + "valid_targets_min": 583 + }, + { + "epoch": 4.414893617021277, + "grad_norm": 0.44390729359629055, + "learning_rate": 1.4457307561106226e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15813395380973816, + "step": 2905, + "valid_targets_mean": 4969.7, + "valid_targets_min": 776 + }, + { + "epoch": 4.422492401215806, + "grad_norm": 0.5018673994147254, + "learning_rate": 1.438452389413656e-05, + "loss": 0.174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15857061743736267, + "step": 2910, + "valid_targets_mean": 3932.9, + "valid_targets_min": 633 + }, + { + "epoch": 4.430091185410334, + "grad_norm": 0.4806146977648294, + "learning_rate": 1.4311820872037264e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1731853187084198, + "step": 2915, + "valid_targets_mean": 4577.8, + "valid_targets_min": 328 + }, + { + "epoch": 4.437689969604863, + "grad_norm": 0.4617241179372537, + "learning_rate": 1.423919953890963e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18794915080070496, + "step": 2920, + "valid_targets_mean": 5022.4, + "valid_targets_min": 629 + }, + { + "epoch": 4.445288753799392, + "grad_norm": 0.48939540976668194, + "learning_rate": 1.4166660937681771e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17850425839424133, + "step": 2925, + "valid_targets_mean": 4809.8, + "valid_targets_min": 409 + }, + { + "epoch": 4.452887537993921, + "grad_norm": 0.4842928277510236, + "learning_rate": 1.4094206110093712e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1715683937072754, + "step": 2930, + "valid_targets_mean": 4601.0, + "valid_targets_min": 1634 + }, + { + "epoch": 4.46048632218845, + "grad_norm": 0.4074913023031158, + "learning_rate": 1.4021836096682343e-05, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1516132950782776, + "step": 2935, + "valid_targets_mean": 5510.4, + "valid_targets_min": 778 + }, + { + "epoch": 4.468085106382979, + "grad_norm": 0.42946649016106, + "learning_rate": 1.394955193676657e-05, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16095909476280212, + "step": 2940, + "valid_targets_mean": 5310.4, + "valid_targets_min": 1020 + }, + { + "epoch": 4.475683890577508, + "grad_norm": 0.4385638298379317, + "learning_rate": 1.3877354668432297e-05, + "loss": 0.1606, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15726953744888306, + "step": 2945, + "valid_targets_mean": 4561.4, + "valid_targets_min": 590 + }, + { + "epoch": 4.4832826747720365, + "grad_norm": 0.5193379752723524, + "learning_rate": 1.38052453285176e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16541394591331482, + "step": 2950, + "valid_targets_mean": 3761.6, + "valid_targets_min": 561 + }, + { + "epoch": 4.490881458966565, + "grad_norm": 0.4490166063535176, + "learning_rate": 1.3733224952597764e-05, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1475997269153595, + "step": 2955, + "valid_targets_mean": 4226.3, + "valid_targets_min": 666 + }, + { + "epoch": 4.498480243161095, + "grad_norm": 0.5152090350550662, + "learning_rate": 1.3661294574970485e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18321773409843445, + "step": 2960, + "valid_targets_mean": 4961.2, + "valid_targets_min": 602 + }, + { + "epoch": 4.506079027355623, + "grad_norm": 0.43396945945061266, + "learning_rate": 1.3589455228640938e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17606866359710693, + "step": 2965, + "valid_targets_mean": 5181.6, + "valid_targets_min": 1460 + }, + { + "epoch": 4.513677811550152, + "grad_norm": 0.4690277765034569, + "learning_rate": 1.3517707945307013e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18834683299064636, + "step": 2970, + "valid_targets_mean": 4646.1, + "valid_targets_min": 953 + }, + { + "epoch": 4.5212765957446805, + "grad_norm": 0.5066167184858231, + "learning_rate": 1.3446053755344439e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15756574273109436, + "step": 2975, + "valid_targets_mean": 4516.5, + "valid_targets_min": 844 + }, + { + "epoch": 4.52887537993921, + "grad_norm": 0.5134300889609936, + "learning_rate": 1.3374493687792045e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19803106784820557, + "step": 2980, + "valid_targets_mean": 4092.7, + "valid_targets_min": 719 + }, + { + "epoch": 4.536474164133739, + "grad_norm": 0.4463665768466012, + "learning_rate": 1.3303028770336914e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606081873178482, + "step": 2985, + "valid_targets_mean": 4620.2, + "valid_targets_min": 714 + }, + { + "epoch": 4.544072948328267, + "grad_norm": 0.4159202471735973, + "learning_rate": 1.3231660029299703e-05, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16770178079605103, + "step": 2990, + "valid_targets_mean": 5974.0, + "valid_targets_min": 1221 + }, + { + "epoch": 4.551671732522796, + "grad_norm": 0.4174566017417907, + "learning_rate": 1.316038848961982e-05, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16645801067352295, + "step": 2995, + "valid_targets_mean": 5945.2, + "valid_targets_min": 1900 + }, + { + "epoch": 4.5592705167173255, + "grad_norm": 0.41700946512428255, + "learning_rate": 1.3089215174840783e-05, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15380007028579712, + "step": 3000, + "valid_targets_mean": 5489.1, + "valid_targets_min": 894 + }, + { + "epoch": 4.566869300911854, + "grad_norm": 0.5304865164006446, + "learning_rate": 1.3018141107095455e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16447263956069946, + "step": 3005, + "valid_targets_mean": 3494.4, + "valid_targets_min": 491 + }, + { + "epoch": 4.574468085106383, + "grad_norm": 0.48520946915581575, + "learning_rate": 1.2947167307091424e-05, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15445218980312347, + "step": 3010, + "valid_targets_mean": 4516.8, + "valid_targets_min": 664 + }, + { + "epoch": 4.5820668693009114, + "grad_norm": 0.47660891700948743, + "learning_rate": 1.287629479409628e-05, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15592724084854126, + "step": 3015, + "valid_targets_mean": 4057.2, + "valid_targets_min": 905 + }, + { + "epoch": 4.589665653495441, + "grad_norm": 0.41867433836985307, + "learning_rate": 1.2805524585923048e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1499272882938385, + "step": 3020, + "valid_targets_mean": 4869.1, + "valid_targets_min": 501 + }, + { + "epoch": 4.59726443768997, + "grad_norm": 0.40048802671676303, + "learning_rate": 1.2734857698915502e-05, + "loss": 0.1615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13745658099651337, + "step": 3025, + "valid_targets_mean": 5079.4, + "valid_targets_min": 1254 + }, + { + "epoch": 4.604863221884498, + "grad_norm": 0.4149531859078102, + "learning_rate": 1.266429514793363e-05, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15537427365779877, + "step": 3030, + "valid_targets_mean": 5211.1, + "valid_targets_min": 861 + }, + { + "epoch": 4.612462006079028, + "grad_norm": 0.6490039513363777, + "learning_rate": 1.2593837946339008e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18460866808891296, + "step": 3035, + "valid_targets_mean": 4625.2, + "valid_targets_min": 918 + }, + { + "epoch": 4.620060790273556, + "grad_norm": 0.4992835453594247, + "learning_rate": 1.252348710598029e-05, + "loss": 0.1736, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.167076975107193, + "step": 3040, + "valid_targets_mean": 4100.6, + "valid_targets_min": 432 + }, + { + "epoch": 4.627659574468085, + "grad_norm": 0.4615786144106656, + "learning_rate": 1.245324363717864e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16426023840904236, + "step": 3045, + "valid_targets_mean": 4565.8, + "valid_targets_min": 828 + }, + { + "epoch": 4.635258358662614, + "grad_norm": 0.41965629757500533, + "learning_rate": 1.2383108548713254e-05, + "loss": 0.1669, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1467415690422058, + "step": 3050, + "valid_targets_mean": 5088.9, + "valid_targets_min": 1159 + }, + { + "epoch": 4.642857142857143, + "grad_norm": 0.4441828730212559, + "learning_rate": 1.2313082847806852e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1823854148387909, + "step": 3055, + "valid_targets_mean": 4715.0, + "valid_targets_min": 741 + }, + { + "epoch": 4.650455927051672, + "grad_norm": 0.459237281832204, + "learning_rate": 1.2243167540111216e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14467957615852356, + "step": 3060, + "valid_targets_mean": 3859.3, + "valid_targets_min": 673 + }, + { + "epoch": 4.6580547112462005, + "grad_norm": 0.4946697227727552, + "learning_rate": 1.2173363629692756e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19268517196178436, + "step": 3065, + "valid_targets_mean": 3953.9, + "valid_targets_min": 931 + }, + { + "epoch": 4.665653495440729, + "grad_norm": 0.43385480300907053, + "learning_rate": 1.2103672119018086e-05, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15391963720321655, + "step": 3070, + "valid_targets_mean": 4950.1, + "valid_targets_min": 1644 + }, + { + "epoch": 4.673252279635259, + "grad_norm": 0.47954284812840575, + "learning_rate": 1.2034094008939624e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19548840820789337, + "step": 3075, + "valid_targets_mean": 5021.8, + "valid_targets_min": 774 + }, + { + "epoch": 4.680851063829787, + "grad_norm": 0.4636621474235311, + "learning_rate": 1.1964630298681221e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14948098361492157, + "step": 3080, + "valid_targets_mean": 3846.4, + "valid_targets_min": 940 + }, + { + "epoch": 4.688449848024316, + "grad_norm": 1.0079316283200346, + "learning_rate": 1.1895281985823815e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1952032595872879, + "step": 3085, + "valid_targets_mean": 3557.1, + "valid_targets_min": 570 + }, + { + "epoch": 4.696048632218845, + "grad_norm": 0.4689910480215768, + "learning_rate": 1.1826050066291097e-05, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16652071475982666, + "step": 3090, + "valid_targets_mean": 4480.4, + "valid_targets_min": 728 + }, + { + "epoch": 4.703647416413374, + "grad_norm": 0.451296162182404, + "learning_rate": 1.1756935534335212e-05, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13410750031471252, + "step": 3095, + "valid_targets_mean": 4405.3, + "valid_targets_min": 1325 + }, + { + "epoch": 4.711246200607903, + "grad_norm": 0.42448672661829445, + "learning_rate": 1.1687939382522493e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1605660319328308, + "step": 3100, + "valid_targets_mean": 4885.4, + "valid_targets_min": 1135 + }, + { + "epoch": 4.718844984802431, + "grad_norm": 0.4707749031713044, + "learning_rate": 1.1619062601719171e-05, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1604931652545929, + "step": 3105, + "valid_targets_mean": 4170.2, + "valid_targets_min": 857 + }, + { + "epoch": 4.72644376899696, + "grad_norm": 0.4670653605297488, + "learning_rate": 1.15503061810772e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16604188084602356, + "step": 3110, + "valid_targets_mean": 5626.7, + "valid_targets_min": 414 + }, + { + "epoch": 4.73404255319149, + "grad_norm": 0.4609235488117921, + "learning_rate": 1.1481671108019984e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23836404085159302, + "step": 3115, + "valid_targets_mean": 5899.8, + "valid_targets_min": 543 + }, + { + "epoch": 4.741641337386018, + "grad_norm": 0.46042215360634425, + "learning_rate": 1.1413158368228249e-05, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.163685142993927, + "step": 3120, + "valid_targets_mean": 4402.4, + "valid_targets_min": 512 + }, + { + "epoch": 4.749240121580547, + "grad_norm": 0.5338506865095787, + "learning_rate": 1.1344768945625884e-05, + "loss": 0.1723, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1888251006603241, + "step": 3125, + "valid_targets_mean": 4155.4, + "valid_targets_min": 537 + }, + { + "epoch": 4.756838905775076, + "grad_norm": 0.40137648604950027, + "learning_rate": 1.127650382236578e-05, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14810852706432343, + "step": 3130, + "valid_targets_mean": 5314.3, + "valid_targets_min": 354 + }, + { + "epoch": 4.764437689969605, + "grad_norm": 0.5240009462617735, + "learning_rate": 1.1208363978815746e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18549123406410217, + "step": 3135, + "valid_targets_mean": 4340.2, + "valid_targets_min": 593 + }, + { + "epoch": 4.772036474164134, + "grad_norm": 0.43622202592017323, + "learning_rate": 1.1140350393544422e-05, + "loss": 0.1593, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15057966113090515, + "step": 3140, + "valid_targets_mean": 4826.1, + "valid_targets_min": 794 + }, + { + "epoch": 4.779635258358662, + "grad_norm": 0.46748847050986225, + "learning_rate": 1.1072464043307259e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20890364050865173, + "step": 3145, + "valid_targets_mean": 4810.9, + "valid_targets_min": 434 + }, + { + "epoch": 4.787234042553192, + "grad_norm": 0.4709284580160048, + "learning_rate": 1.1004705903032406e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17572824656963348, + "step": 3150, + "valid_targets_mean": 4275.6, + "valid_targets_min": 787 + }, + { + "epoch": 4.7948328267477205, + "grad_norm": 0.4947193520586711, + "learning_rate": 1.0937076945806837e-05, + "loss": 0.1567, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1653721183538437, + "step": 3155, + "valid_targets_mean": 3996.0, + "valid_targets_min": 608 + }, + { + "epoch": 4.802431610942249, + "grad_norm": 0.95424384071522, + "learning_rate": 1.0869578142862228e-05, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15056976675987244, + "step": 3160, + "valid_targets_mean": 5264.1, + "valid_targets_min": 911 + }, + { + "epoch": 4.810030395136778, + "grad_norm": 0.436658942790459, + "learning_rate": 1.0802210463561166e-05, + "loss": 0.1709, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15242451429367065, + "step": 3165, + "valid_targets_mean": 4430.6, + "valid_targets_min": 1307 + }, + { + "epoch": 4.817629179331307, + "grad_norm": 0.4940935042193495, + "learning_rate": 1.0734974875383066e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15941178798675537, + "step": 3170, + "valid_targets_mean": 3658.3, + "valid_targets_min": 640 + }, + { + "epoch": 4.825227963525836, + "grad_norm": 0.5857976461421992, + "learning_rate": 1.0667872343910432e-05, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16998814046382904, + "step": 3175, + "valid_targets_mean": 4214.1, + "valid_targets_min": 619 + }, + { + "epoch": 4.832826747720365, + "grad_norm": 0.447470023536544, + "learning_rate": 1.0600903832814856e-05, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16528019309043884, + "step": 3180, + "valid_targets_mean": 4406.2, + "valid_targets_min": 774 + }, + { + "epoch": 4.840425531914894, + "grad_norm": 0.45582176893752013, + "learning_rate": 1.0534070303843294e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16366976499557495, + "step": 3185, + "valid_targets_mean": 4874.4, + "valid_targets_min": 531 + }, + { + "epoch": 4.848024316109423, + "grad_norm": 0.491211928315914, + "learning_rate": 1.0467372716804141e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18734398484230042, + "step": 3190, + "valid_targets_mean": 4573.7, + "valid_targets_min": 655 + }, + { + "epoch": 4.855623100303951, + "grad_norm": 0.5889214979063572, + "learning_rate": 1.0400812029553569e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18033604323863983, + "step": 3195, + "valid_targets_mean": 3925.6, + "valid_targets_min": 545 + }, + { + "epoch": 4.86322188449848, + "grad_norm": 0.4334778399672226, + "learning_rate": 1.0334389197981638e-05, + "loss": 0.1741, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14970465004444122, + "step": 3200, + "valid_targets_mean": 4557.2, + "valid_targets_min": 736 + }, + { + "epoch": 4.870820668693009, + "grad_norm": 0.5178203832731765, + "learning_rate": 1.0268105175998713e-05, + "loss": 0.1667, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16531500220298767, + "step": 3205, + "valid_targets_mean": 3934.7, + "valid_targets_min": 616 + }, + { + "epoch": 4.878419452887538, + "grad_norm": 0.4677336984690735, + "learning_rate": 1.0201960915521614e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15597450733184814, + "step": 3210, + "valid_targets_mean": 4830.2, + "valid_targets_min": 631 + }, + { + "epoch": 4.886018237082067, + "grad_norm": 0.4582200792232965, + "learning_rate": 1.0135957366460087e-05, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14683008193969727, + "step": 3215, + "valid_targets_mean": 4210.8, + "valid_targets_min": 1151 + }, + { + "epoch": 4.8936170212765955, + "grad_norm": 2.6017818537468114, + "learning_rate": 1.0070095476703036e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17961879074573517, + "step": 3220, + "valid_targets_mean": 4255.6, + "valid_targets_min": 544 + }, + { + "epoch": 4.901215805471125, + "grad_norm": 0.48429935318886524, + "learning_rate": 1.0004376192105032e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1917518824338913, + "step": 3225, + "valid_targets_mean": 4164.9, + "valid_targets_min": 683 + }, + { + "epoch": 4.908814589665654, + "grad_norm": 0.408755975822661, + "learning_rate": 9.938800456472603e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15598323941230774, + "step": 3230, + "valid_targets_mean": 5235.6, + "valid_targets_min": 673 + }, + { + "epoch": 4.916413373860182, + "grad_norm": 0.5687286004991109, + "learning_rate": 9.87336921155081e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1737002432346344, + "step": 3235, + "valid_targets_mean": 4874.2, + "valid_targets_min": 648 + }, + { + "epoch": 4.924012158054711, + "grad_norm": 0.4449749826597429, + "learning_rate": 9.80808339700959e-06, + "loss": 0.1652, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18869557976722717, + "step": 3240, + "valid_targets_mean": 5077.7, + "valid_targets_min": 778 + }, + { + "epoch": 4.9316109422492405, + "grad_norm": 0.43592967879817196, + "learning_rate": 9.74294395043039e-06, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16350185871124268, + "step": 3245, + "valid_targets_mean": 4365.2, + "valid_targets_min": 579 + }, + { + "epoch": 4.939209726443769, + "grad_norm": 0.44671726836927617, + "learning_rate": 9.677951807292584e-06, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1710178405046463, + "step": 3250, + "valid_targets_mean": 4973.2, + "valid_targets_min": 456 + }, + { + "epoch": 4.946808510638298, + "grad_norm": 0.4273982901555888, + "learning_rate": 9.61310790096015e-06, + "loss": 0.1588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13666585087776184, + "step": 3255, + "valid_targets_mean": 4339.3, + "valid_targets_min": 970 + }, + { + "epoch": 4.954407294832826, + "grad_norm": 0.4111092281316594, + "learning_rate": 9.548413162668155e-06, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13993242383003235, + "step": 3260, + "valid_targets_mean": 5151.4, + "valid_targets_min": 1606 + }, + { + "epoch": 4.962006079027356, + "grad_norm": 0.4678973949276577, + "learning_rate": 9.483868521509492e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17497044801712036, + "step": 3265, + "valid_targets_mean": 4569.8, + "valid_targets_min": 507 + }, + { + "epoch": 4.9696048632218845, + "grad_norm": 0.48923608505182986, + "learning_rate": 9.419474904421422e-06, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17405647039413452, + "step": 3270, + "valid_targets_mean": 4378.5, + "valid_targets_min": 646 + }, + { + "epoch": 4.977203647416413, + "grad_norm": 0.42080134203036884, + "learning_rate": 9.355233236172381e-06, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14896854758262634, + "step": 3275, + "valid_targets_mean": 4978.7, + "valid_targets_min": 1936 + }, + { + "epoch": 4.984802431610943, + "grad_norm": 0.45247817713087146, + "learning_rate": 9.2911444393486e-06, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14354351162910461, + "step": 3280, + "valid_targets_mean": 5089.9, + "valid_targets_min": 754 + }, + { + "epoch": 4.992401215805471, + "grad_norm": 0.4892191547497894, + "learning_rate": 9.227209434340914e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1533418595790863, + "step": 3285, + "valid_targets_mean": 4154.1, + "valid_targets_min": 434 + }, + { + "epoch": 5.0, + "grad_norm": 0.4367103289579237, + "learning_rate": 9.163429139331516e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1589938849210739, + "step": 3290, + "valid_targets_mean": 4928.7, + "valid_targets_min": 567 + }, + { + "epoch": 5.007598784194529, + "grad_norm": 0.5506166462705645, + "learning_rate": 9.099804470280791e-06, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1861366331577301, + "step": 3295, + "valid_targets_mean": 3553.4, + "valid_targets_min": 519 + }, + { + "epoch": 5.015197568389058, + "grad_norm": 0.44365514163643444, + "learning_rate": 9.036336340914138e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18212465941905975, + "step": 3300, + "valid_targets_mean": 4840.4, + "valid_targets_min": 465 + }, + { + "epoch": 5.022796352583587, + "grad_norm": 0.5002686598117616, + "learning_rate": 8.973025662708875e-06, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16633297502994537, + "step": 3305, + "valid_targets_mean": 5309.5, + "valid_targets_min": 834 + }, + { + "epoch": 5.0303951367781155, + "grad_norm": 0.5141187748402867, + "learning_rate": 8.909873344881125e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1726076900959015, + "step": 3310, + "valid_targets_mean": 4149.5, + "valid_targets_min": 1217 + }, + { + "epoch": 5.037993920972644, + "grad_norm": 0.42838999609845296, + "learning_rate": 8.846880294372777e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1435108482837677, + "step": 3315, + "valid_targets_mean": 5703.1, + "valid_targets_min": 781 + }, + { + "epoch": 5.045592705167174, + "grad_norm": 0.4990807265797361, + "learning_rate": 8.784047415838446e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15767478942871094, + "step": 3320, + "valid_targets_mean": 4732.1, + "valid_targets_min": 998 + }, + { + "epoch": 5.053191489361702, + "grad_norm": 0.4244905713932716, + "learning_rate": 8.721375611632494e-06, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15482358634471893, + "step": 3325, + "valid_targets_mean": 5283.4, + "valid_targets_min": 977 + }, + { + "epoch": 5.060790273556231, + "grad_norm": 0.5268949634896493, + "learning_rate": 8.658865781796059e-06, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16202573478221893, + "step": 3330, + "valid_targets_mean": 4009.4, + "valid_targets_min": 736 + }, + { + "epoch": 5.0683890577507595, + "grad_norm": 0.5222959380611226, + "learning_rate": 8.596518824044145e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1700512319803238, + "step": 3335, + "valid_targets_mean": 4815.2, + "valid_targets_min": 1896 + }, + { + "epoch": 5.075987841945289, + "grad_norm": 0.48817810574292153, + "learning_rate": 8.53433563375271e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15692004561424255, + "step": 3340, + "valid_targets_mean": 5081.4, + "valid_targets_min": 604 + }, + { + "epoch": 5.083586626139818, + "grad_norm": 0.5145519787891805, + "learning_rate": 8.472317103945827e-06, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15433713793754578, + "step": 3345, + "valid_targets_mean": 4923.5, + "valid_targets_min": 664 + }, + { + "epoch": 5.091185410334346, + "grad_norm": 0.49860669438741656, + "learning_rate": 8.410464125282842e-06, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18030579388141632, + "step": 3350, + "valid_targets_mean": 4054.4, + "valid_targets_min": 849 + }, + { + "epoch": 5.098784194528875, + "grad_norm": 0.4977104637629043, + "learning_rate": 8.348777586045599e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16232091188430786, + "step": 3355, + "valid_targets_mean": 4112.1, + "valid_targets_min": 342 + }, + { + "epoch": 5.1063829787234045, + "grad_norm": 0.40792452628529047, + "learning_rate": 8.287258372125666e-06, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14980870485305786, + "step": 3360, + "valid_targets_mean": 5441.6, + "valid_targets_min": 828 + }, + { + "epoch": 5.113981762917933, + "grad_norm": 0.48624828153735117, + "learning_rate": 8.22590736701163e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15524911880493164, + "step": 3365, + "valid_targets_mean": 4029.7, + "valid_targets_min": 527 + }, + { + "epoch": 5.121580547112462, + "grad_norm": 0.41260477481757823, + "learning_rate": 8.164725451776396e-06, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11952009797096252, + "step": 3370, + "valid_targets_mean": 5127.4, + "valid_targets_min": 1259 + }, + { + "epoch": 5.129179331306991, + "grad_norm": 0.5121037283737604, + "learning_rate": 8.103713505064542e-06, + "loss": 0.147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13970546424388885, + "step": 3375, + "valid_targets_mean": 5086.4, + "valid_targets_min": 447 + }, + { + "epoch": 5.13677811550152, + "grad_norm": 0.554785879889902, + "learning_rate": 8.042872403079695e-06, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17848604917526245, + "step": 3380, + "valid_targets_mean": 4835.1, + "valid_targets_min": 229 + }, + { + "epoch": 5.144376899696049, + "grad_norm": 0.4543169082658653, + "learning_rate": 7.982203019571951e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15885761380195618, + "step": 3385, + "valid_targets_mean": 4715.2, + "valid_targets_min": 614 + }, + { + "epoch": 5.151975683890577, + "grad_norm": 0.49519076224489234, + "learning_rate": 7.921706225825323e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14728990197181702, + "step": 3390, + "valid_targets_mean": 4132.9, + "valid_targets_min": 705 + }, + { + "epoch": 5.159574468085107, + "grad_norm": 0.4692483655123735, + "learning_rate": 7.861382890645235e-06, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1677330732345581, + "step": 3395, + "valid_targets_mean": 4542.6, + "valid_targets_min": 571 + }, + { + "epoch": 5.167173252279635, + "grad_norm": 0.4981198573213941, + "learning_rate": 7.801233880346044e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.152068629860878, + "step": 3400, + "valid_targets_mean": 4878.0, + "valid_targets_min": 1568 + }, + { + "epoch": 5.174772036474164, + "grad_norm": 0.496973792548942, + "learning_rate": 7.741260058738576e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1845749020576477, + "step": 3405, + "valid_targets_mean": 4564.4, + "valid_targets_min": 333 + }, + { + "epoch": 5.182370820668693, + "grad_norm": 0.4801400629594902, + "learning_rate": 7.681462287117769e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15920531749725342, + "step": 3410, + "valid_targets_mean": 4416.2, + "valid_targets_min": 618 + }, + { + "epoch": 5.189969604863222, + "grad_norm": 0.44717438624237565, + "learning_rate": 7.62184142425026e-06, + "loss": 0.1586, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14159642159938812, + "step": 3415, + "valid_targets_mean": 5296.9, + "valid_targets_min": 1679 + }, + { + "epoch": 5.197568389057751, + "grad_norm": 0.5694531326181947, + "learning_rate": 7.562398326362068e-06, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1987701654434204, + "step": 3420, + "valid_targets_mean": 4228.2, + "valid_targets_min": 500 + }, + { + "epoch": 5.2051671732522795, + "grad_norm": 0.4837511107077152, + "learning_rate": 7.503133847126298e-06, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14547014236450195, + "step": 3425, + "valid_targets_mean": 4541.6, + "valid_targets_min": 537 + }, + { + "epoch": 5.212765957446808, + "grad_norm": 0.46751291957082003, + "learning_rate": 7.444048837650879e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1482163369655609, + "step": 3430, + "valid_targets_mean": 4358.3, + "valid_targets_min": 464 + }, + { + "epoch": 5.220364741641338, + "grad_norm": 0.48046465195883326, + "learning_rate": 7.3851441464663455e-06, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15912845730781555, + "step": 3435, + "valid_targets_mean": 4258.4, + "valid_targets_min": 1734 + }, + { + "epoch": 5.227963525835866, + "grad_norm": 0.5201595463216373, + "learning_rate": 7.326420619513645e-06, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14452405273914337, + "step": 3440, + "valid_targets_mean": 4945.8, + "valid_targets_min": 2273 + }, + { + "epoch": 5.235562310030395, + "grad_norm": 0.4662188330800987, + "learning_rate": 7.267879100131996e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16573692858219147, + "step": 3445, + "valid_targets_mean": 5350.9, + "valid_targets_min": 1272 + }, + { + "epoch": 5.243161094224924, + "grad_norm": 0.5445679437490333, + "learning_rate": 7.209520429046768e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14232224225997925, + "step": 3450, + "valid_targets_mean": 3531.5, + "valid_targets_min": 756 + }, + { + "epoch": 5.250759878419453, + "grad_norm": 0.48341944333718034, + "learning_rate": 7.151345444357418e-06, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15717411041259766, + "step": 3455, + "valid_targets_mean": 4357.9, + "valid_targets_min": 410 + }, + { + "epoch": 5.258358662613982, + "grad_norm": 0.46774893892784875, + "learning_rate": 7.0933549815254465e-06, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16652044653892517, + "step": 3460, + "valid_targets_mean": 5190.8, + "valid_targets_min": 433 + }, + { + "epoch": 5.26595744680851, + "grad_norm": 0.47602862093431514, + "learning_rate": 7.035549873362406e-06, + "loss": 0.1648, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14074617624282837, + "step": 3465, + "valid_targets_mean": 4641.2, + "valid_targets_min": 1537 + }, + { + "epoch": 5.27355623100304, + "grad_norm": 0.49943887077257887, + "learning_rate": 6.97793095001793e-06, + "loss": 0.1609, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15125201642513275, + "step": 3470, + "valid_targets_mean": 4864.9, + "valid_targets_min": 754 + }, + { + "epoch": 5.281155015197569, + "grad_norm": 0.56912423597624, + "learning_rate": 6.920499038967825e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1387101113796234, + "step": 3475, + "valid_targets_mean": 4497.3, + "valid_targets_min": 780 + }, + { + "epoch": 5.288753799392097, + "grad_norm": 0.6031298612179781, + "learning_rate": 6.863254965002178e-06, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17738491296768188, + "step": 3480, + "valid_targets_mean": 2821.3, + "valid_targets_min": 271 + }, + { + "epoch": 5.296352583586626, + "grad_norm": 0.42528213654923797, + "learning_rate": 6.80619955021351e-06, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13962329924106598, + "step": 3485, + "valid_targets_mean": 5226.1, + "valid_targets_min": 1805 + }, + { + "epoch": 5.303951367781155, + "grad_norm": 0.4641682316415047, + "learning_rate": 6.749333613984979e-06, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17883500456809998, + "step": 3490, + "valid_targets_mean": 5505.0, + "valid_targets_min": 2227 + }, + { + "epoch": 5.311550151975684, + "grad_norm": 0.41287597531580333, + "learning_rate": 6.6926579729786025e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13076046109199524, + "step": 3495, + "valid_targets_mean": 5253.1, + "valid_targets_min": 624 + }, + { + "epoch": 5.319148936170213, + "grad_norm": 0.47804228992277803, + "learning_rate": 6.636173441123537e-06, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13578727841377258, + "step": 3500, + "valid_targets_mean": 4141.2, + "valid_targets_min": 974 + }, + { + "epoch": 5.326747720364741, + "grad_norm": 0.6043580771376642, + "learning_rate": 6.5798808296043835e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1580887734889984, + "step": 3505, + "valid_targets_mean": 3810.6, + "valid_targets_min": 769 + }, + { + "epoch": 5.334346504559271, + "grad_norm": 0.5078101575478104, + "learning_rate": 6.52378094684954e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17107361555099487, + "step": 3510, + "valid_targets_mean": 3934.2, + "valid_targets_min": 280 + }, + { + "epoch": 5.3419452887537995, + "grad_norm": 0.4141414829732433, + "learning_rate": 6.467874598519597e-06, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14241334795951843, + "step": 3515, + "valid_targets_mean": 5362.5, + "valid_targets_min": 2329 + }, + { + "epoch": 5.349544072948328, + "grad_norm": 0.4458140032344141, + "learning_rate": 6.412162587495754e-06, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13986560702323914, + "step": 3520, + "valid_targets_mean": 4640.0, + "valid_targets_min": 593 + }, + { + "epoch": 5.357142857142857, + "grad_norm": 0.49980298962190683, + "learning_rate": 6.3566457138683015e-06, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1622404009103775, + "step": 3525, + "valid_targets_mean": 4144.8, + "valid_targets_min": 523 + }, + { + "epoch": 5.364741641337386, + "grad_norm": 0.5309190846014655, + "learning_rate": 6.301324774925128e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1419864147901535, + "step": 3530, + "valid_targets_mean": 4773.5, + "valid_targets_min": 601 + }, + { + "epoch": 5.372340425531915, + "grad_norm": 0.4394509557877868, + "learning_rate": 6.246200565140266e-06, + "loss": 0.1382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1346133053302765, + "step": 3535, + "valid_targets_mean": 5314.5, + "valid_targets_min": 1831 + }, + { + "epoch": 5.379939209726444, + "grad_norm": 0.6157551616826415, + "learning_rate": 6.191273876162487e-06, + "loss": 0.1559, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16742759943008423, + "step": 3540, + "valid_targets_mean": 4461.9, + "valid_targets_min": 632 + }, + { + "epoch": 5.387537993920972, + "grad_norm": 0.5207893779334158, + "learning_rate": 6.136545496803925e-06, + "loss": 0.1576, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173904150724411, + "step": 3545, + "valid_targets_mean": 4806.9, + "valid_targets_min": 816 + }, + { + "epoch": 5.395136778115502, + "grad_norm": 0.41444365615563894, + "learning_rate": 6.082016213028761e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14027008414268494, + "step": 3550, + "valid_targets_mean": 5664.4, + "valid_targets_min": 840 + }, + { + "epoch": 5.40273556231003, + "grad_norm": 0.4453222293778615, + "learning_rate": 6.0276868079419235e-06, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1503615528345108, + "step": 3555, + "valid_targets_mean": 5143.4, + "valid_targets_min": 979 + }, + { + "epoch": 5.410334346504559, + "grad_norm": 0.4781236892192554, + "learning_rate": 5.973558061777849e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15819337964057922, + "step": 3560, + "valid_targets_mean": 5053.2, + "valid_targets_min": 664 + }, + { + "epoch": 5.4179331306990886, + "grad_norm": 0.41198950921824895, + "learning_rate": 5.919630751889274e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13387863337993622, + "step": 3565, + "valid_targets_mean": 5390.4, + "valid_targets_min": 1116 + }, + { + "epoch": 5.425531914893617, + "grad_norm": 0.516010838368697, + "learning_rate": 5.865905652736072e-06, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17436817288398743, + "step": 3570, + "valid_targets_mean": 4064.7, + "valid_targets_min": 894 + }, + { + "epoch": 5.433130699088146, + "grad_norm": 0.5141562761832758, + "learning_rate": 5.812383535874131e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13484925031661987, + "step": 3575, + "valid_targets_mean": 3949.4, + "valid_targets_min": 1312 + }, + { + "epoch": 5.4407294832826745, + "grad_norm": 0.45662210514471807, + "learning_rate": 5.759065169944274e-06, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1350744068622589, + "step": 3580, + "valid_targets_mean": 4565.4, + "valid_targets_min": 381 + }, + { + "epoch": 5.448328267477204, + "grad_norm": 0.49698187827992, + "learning_rate": 5.705951320661222e-06, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14174014329910278, + "step": 3585, + "valid_targets_mean": 4722.2, + "valid_targets_min": 2183 + }, + { + "epoch": 5.455927051671733, + "grad_norm": 0.45234338868163015, + "learning_rate": 5.653042750802591e-06, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14616911113262177, + "step": 3590, + "valid_targets_mean": 4943.4, + "valid_targets_min": 1261 + }, + { + "epoch": 5.463525835866261, + "grad_norm": 0.4806481881918089, + "learning_rate": 5.600340220197946e-06, + "loss": 0.1523, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17162683606147766, + "step": 3595, + "valid_targets_mean": 4331.8, + "valid_targets_min": 414 + }, + { + "epoch": 5.47112462006079, + "grad_norm": 0.46035606110531224, + "learning_rate": 5.547844485717884e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14485681056976318, + "step": 3600, + "valid_targets_mean": 4167.1, + "valid_targets_min": 1119 + }, + { + "epoch": 5.4787234042553195, + "grad_norm": 0.4684818904402137, + "learning_rate": 5.4955563012631606e-06, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14429020881652832, + "step": 3605, + "valid_targets_mean": 5013.6, + "valid_targets_min": 1788 + }, + { + "epoch": 5.486322188449848, + "grad_norm": 0.6076128140539062, + "learning_rate": 5.443476417753877e-06, + "loss": 0.1681, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17048346996307373, + "step": 3610, + "valid_targets_mean": 2878.3, + "valid_targets_min": 491 + }, + { + "epoch": 5.493920972644377, + "grad_norm": 0.46825002881973943, + "learning_rate": 5.39160558311868e-06, + "loss": 0.1569, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17676037549972534, + "step": 3615, + "valid_targets_mean": 4354.2, + "valid_targets_min": 646 + }, + { + "epoch": 5.501519756838906, + "grad_norm": 0.49007855328228217, + "learning_rate": 5.33994454228403e-06, + "loss": 0.1608, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17580349743366241, + "step": 3620, + "valid_targets_mean": 5169.6, + "valid_targets_min": 953 + }, + { + "epoch": 5.509118541033435, + "grad_norm": 0.44768382223068853, + "learning_rate": 5.2884940371634915e-06, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14162276685237885, + "step": 3625, + "valid_targets_mean": 5179.9, + "valid_targets_min": 1051 + }, + { + "epoch": 5.5167173252279635, + "grad_norm": 0.4352121315996926, + "learning_rate": 5.237254806647117e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1292266994714737, + "step": 3630, + "valid_targets_mean": 4936.1, + "valid_targets_min": 912 + }, + { + "epoch": 5.524316109422492, + "grad_norm": 0.5433474427694438, + "learning_rate": 5.1862275865907575e-06, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1526874601840973, + "step": 3635, + "valid_targets_mean": 3939.9, + "valid_targets_min": 1082 + }, + { + "epoch": 5.531914893617021, + "grad_norm": 0.4664875248042157, + "learning_rate": 5.135413109805596e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14586284756660461, + "step": 3640, + "valid_targets_mean": 4819.6, + "valid_targets_min": 1708 + }, + { + "epoch": 5.53951367781155, + "grad_norm": 0.4479408250207293, + "learning_rate": 5.084812106047525e-06, + "loss": 0.1325, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13202941417694092, + "step": 3645, + "valid_targets_mean": 5479.3, + "valid_targets_min": 1475 + }, + { + "epoch": 5.547112462006079, + "grad_norm": 0.46875681135292097, + "learning_rate": 5.034425302006751e-06, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15239191055297852, + "step": 3650, + "valid_targets_mean": 4168.7, + "valid_targets_min": 534 + }, + { + "epoch": 5.554711246200608, + "grad_norm": 0.5099225903083285, + "learning_rate": 4.984253421297285e-06, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15130409598350525, + "step": 3655, + "valid_targets_mean": 3672.7, + "valid_targets_min": 465 + }, + { + "epoch": 5.562310030395137, + "grad_norm": 0.44549612335892824, + "learning_rate": 4.934297184446617e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15832307934761047, + "step": 3660, + "valid_targets_mean": 5213.5, + "valid_targets_min": 1002 + }, + { + "epoch": 5.569908814589666, + "grad_norm": 0.5093597450581251, + "learning_rate": 4.884557308885302e-06, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16333602368831635, + "step": 3665, + "valid_targets_mean": 4690.4, + "valid_targets_min": 876 + }, + { + "epoch": 5.577507598784194, + "grad_norm": 0.5062778786472242, + "learning_rate": 4.835034508936736e-06, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15072304010391235, + "step": 3670, + "valid_targets_mean": 4472.8, + "valid_targets_min": 863 + }, + { + "epoch": 5.585106382978723, + "grad_norm": 0.4887232514300285, + "learning_rate": 4.785729495806804e-06, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1506553441286087, + "step": 3675, + "valid_targets_mean": 4468.6, + "valid_targets_min": 1870 + }, + { + "epoch": 5.592705167173253, + "grad_norm": 0.5178118286048965, + "learning_rate": 4.736642977573745e-06, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14603829383850098, + "step": 3680, + "valid_targets_mean": 3900.1, + "valid_targets_min": 314 + }, + { + "epoch": 5.600303951367781, + "grad_norm": 0.49021784557999176, + "learning_rate": 4.6877756591779465e-06, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1614750772714615, + "step": 3685, + "valid_targets_mean": 4549.1, + "valid_targets_min": 1530 + }, + { + "epoch": 5.60790273556231, + "grad_norm": 0.46685253564728685, + "learning_rate": 4.63912824241183e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15664935111999512, + "step": 3690, + "valid_targets_mean": 4078.2, + "valid_targets_min": 702 + }, + { + "epoch": 5.6155015197568385, + "grad_norm": 0.4476391544726595, + "learning_rate": 4.590701425909763e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1614711433649063, + "step": 3695, + "valid_targets_mean": 5204.6, + "valid_targets_min": 1871 + }, + { + "epoch": 5.623100303951368, + "grad_norm": 0.46752237532336416, + "learning_rate": 4.5424959051380376e-06, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14600232243537903, + "step": 3700, + "valid_targets_mean": 4345.4, + "valid_targets_min": 1418 + }, + { + "epoch": 5.630699088145897, + "grad_norm": 0.498907377725208, + "learning_rate": 4.4945123723848785e-06, + "loss": 0.1668, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16958801448345184, + "step": 3705, + "valid_targets_mean": 4744.0, + "valid_targets_min": 655 + }, + { + "epoch": 5.638297872340425, + "grad_norm": 0.47032248993505454, + "learning_rate": 4.446751516750496e-06, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15484699606895447, + "step": 3710, + "valid_targets_mean": 4455.9, + "valid_targets_min": 513 + }, + { + "epoch": 5.645896656534955, + "grad_norm": 0.49291725666506586, + "learning_rate": 4.399214024137199e-06, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1515854448080063, + "step": 3715, + "valid_targets_mean": 4026.5, + "valid_targets_min": 401 + }, + { + "epoch": 5.6534954407294835, + "grad_norm": 0.42768293201723206, + "learning_rate": 4.351900577239534e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1308768093585968, + "step": 3720, + "valid_targets_mean": 5203.2, + "valid_targets_min": 511 + }, + { + "epoch": 5.661094224924012, + "grad_norm": 0.419913357256391, + "learning_rate": 4.30481185553449e-06, + "loss": 0.143, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15996746718883514, + "step": 3725, + "valid_targets_mean": 6209.4, + "valid_targets_min": 3964 + }, + { + "epoch": 5.668693009118541, + "grad_norm": 0.5024514745126758, + "learning_rate": 4.2579485352717365e-06, + "loss": 0.1429, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14928898215293884, + "step": 3730, + "valid_targets_mean": 5061.5, + "valid_targets_min": 854 + }, + { + "epoch": 5.676291793313069, + "grad_norm": 0.6107697257765264, + "learning_rate": 4.211311289463913e-06, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.212265282869339, + "step": 3735, + "valid_targets_mean": 2980.9, + "valid_targets_min": 613 + }, + { + "epoch": 5.683890577507599, + "grad_norm": 0.40762228356266644, + "learning_rate": 4.164900787876958e-06, + "loss": 0.1572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15725256502628326, + "step": 3740, + "valid_targets_mean": 5801.9, + "valid_targets_min": 1647 + }, + { + "epoch": 5.691489361702128, + "grad_norm": 0.42068151608687643, + "learning_rate": 4.118717697020503e-06, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15178656578063965, + "step": 3745, + "valid_targets_mean": 5270.4, + "valid_targets_min": 714 + }, + { + "epoch": 5.699088145896656, + "grad_norm": 0.510742750219421, + "learning_rate": 4.072762680138283e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16748473048210144, + "step": 3750, + "valid_targets_mean": 4111.5, + "valid_targets_min": 690 + }, + { + "epoch": 5.706686930091186, + "grad_norm": 0.4725979697075687, + "learning_rate": 4.02703639719863e-06, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16421647369861603, + "step": 3755, + "valid_targets_mean": 5007.1, + "valid_targets_min": 1593 + }, + { + "epoch": 5.714285714285714, + "grad_norm": 0.4858255836304888, + "learning_rate": 3.981539504884975e-06, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1452382504940033, + "step": 3760, + "valid_targets_mean": 4335.0, + "valid_targets_min": 268 + }, + { + "epoch": 5.721884498480243, + "grad_norm": 0.496365033025088, + "learning_rate": 3.936272656586455e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1607646495103836, + "step": 3765, + "valid_targets_mean": 4691.0, + "valid_targets_min": 801 + }, + { + "epoch": 5.729483282674772, + "grad_norm": 0.49339220005728074, + "learning_rate": 3.891236502388463e-06, + "loss": 0.1399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12876826524734497, + "step": 3770, + "valid_targets_mean": 3568.1, + "valid_targets_min": 629 + }, + { + "epoch": 5.737082066869301, + "grad_norm": 0.42864321603538963, + "learning_rate": 3.846431689063395e-06, + "loss": 0.1598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14634914696216583, + "step": 3775, + "valid_targets_mean": 5067.4, + "valid_targets_min": 1740 + }, + { + "epoch": 5.74468085106383, + "grad_norm": 0.4858809112101771, + "learning_rate": 3.801858860061276e-06, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15183153748512268, + "step": 3780, + "valid_targets_mean": 4308.7, + "valid_targets_min": 2342 + }, + { + "epoch": 5.7522796352583585, + "grad_norm": 0.430010773677836, + "learning_rate": 3.757518655500607e-06, + "loss": 0.1413, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11490476876497269, + "step": 3785, + "valid_targets_mean": 4636.7, + "valid_targets_min": 1770 + }, + { + "epoch": 5.759878419452887, + "grad_norm": 0.44137928337029025, + "learning_rate": 3.7134117121590783e-06, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1602618247270584, + "step": 3790, + "valid_targets_mean": 5387.0, + "valid_targets_min": 1520 + }, + { + "epoch": 5.767477203647417, + "grad_norm": 0.49716171756169814, + "learning_rate": 3.6695386634645268e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17046484351158142, + "step": 3795, + "valid_targets_mean": 3979.0, + "valid_targets_min": 637 + }, + { + "epoch": 5.775075987841945, + "grad_norm": 0.6225472640133536, + "learning_rate": 3.625900139485732e-06, + "loss": 0.1483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16933000087738037, + "step": 3800, + "valid_targets_mean": 3247.3, + "valid_targets_min": 403 + }, + { + "epoch": 5.782674772036474, + "grad_norm": 0.4826561130766123, + "learning_rate": 3.5824967669234712e-06, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14927777647972107, + "step": 3805, + "valid_targets_mean": 4297.8, + "valid_targets_min": 479 + }, + { + "epoch": 5.7902735562310035, + "grad_norm": 0.4594251420598871, + "learning_rate": 3.539329169101424e-06, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15365689992904663, + "step": 3810, + "valid_targets_mean": 5167.4, + "valid_targets_min": 892 + }, + { + "epoch": 5.797872340425532, + "grad_norm": 0.4557619581698914, + "learning_rate": 3.49639796595731e-06, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1744815707206726, + "step": 3815, + "valid_targets_mean": 5195.3, + "valid_targets_min": 634 + }, + { + "epoch": 5.805471124620061, + "grad_norm": 0.46537892238846895, + "learning_rate": 3.453703774033901e-06, + "loss": 0.1587, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14466017484664917, + "step": 3820, + "valid_targets_mean": 4648.0, + "valid_targets_min": 736 + }, + { + "epoch": 5.813069908814589, + "grad_norm": 0.5247551115664364, + "learning_rate": 3.4112472064702473e-06, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16951002180576324, + "step": 3825, + "valid_targets_mean": 4100.8, + "valid_targets_min": 529 + }, + { + "epoch": 5.820668693009118, + "grad_norm": 0.4408079538837663, + "learning_rate": 3.369028872992792e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15143227577209473, + "step": 3830, + "valid_targets_mean": 5215.6, + "valid_targets_min": 1009 + }, + { + "epoch": 5.828267477203648, + "grad_norm": 0.5248938288172915, + "learning_rate": 3.327049379906695e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17994189262390137, + "step": 3835, + "valid_targets_mean": 4604.6, + "valid_targets_min": 570 + }, + { + "epoch": 5.835866261398176, + "grad_norm": 0.5052356507109963, + "learning_rate": 3.2853093300870452e-06, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1394600123167038, + "step": 3840, + "valid_targets_mean": 4017.8, + "valid_targets_min": 736 + }, + { + "epoch": 5.843465045592705, + "grad_norm": 0.47463044811940924, + "learning_rate": 3.2438093229702905e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15373006463050842, + "step": 3845, + "valid_targets_mean": 4134.0, + "valid_targets_min": 410 + }, + { + "epoch": 5.851063829787234, + "grad_norm": 0.513814275937601, + "learning_rate": 3.202549954545533e-06, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23078633844852448, + "step": 3850, + "valid_targets_mean": 5543.1, + "valid_targets_min": 655 + }, + { + "epoch": 5.858662613981763, + "grad_norm": 0.48073524570765763, + "learning_rate": 3.161531817346062e-06, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17668002843856812, + "step": 3855, + "valid_targets_mean": 4446.9, + "valid_targets_min": 449 + }, + { + "epoch": 5.866261398176292, + "grad_norm": 0.4258287069585612, + "learning_rate": 3.120755500440762e-06, + "loss": 0.1546, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14281289279460907, + "step": 3860, + "valid_targets_mean": 5058.1, + "valid_targets_min": 1100 + }, + { + "epoch": 5.87386018237082, + "grad_norm": 0.4556314380528612, + "learning_rate": 3.0802215894257336e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15817520022392273, + "step": 3865, + "valid_targets_mean": 4869.2, + "valid_targets_min": 2426 + }, + { + "epoch": 5.88145896656535, + "grad_norm": 0.632880711013422, + "learning_rate": 3.0399306664158e-06, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440334916114807, + "step": 3870, + "valid_targets_mean": 4294.8, + "valid_targets_min": 1072 + }, + { + "epoch": 5.8890577507598785, + "grad_norm": 0.40158185981708566, + "learning_rate": 2.9998833100362336e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1560600996017456, + "step": 3875, + "valid_targets_mean": 5631.6, + "valid_targets_min": 1728 + }, + { + "epoch": 5.896656534954407, + "grad_norm": 0.500623763450671, + "learning_rate": 2.9600800954143572e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1587526947259903, + "step": 3880, + "valid_targets_mean": 4028.8, + "valid_targets_min": 666 + }, + { + "epoch": 5.904255319148936, + "grad_norm": 0.4944868321896879, + "learning_rate": 2.9205215941713704e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16634200513362885, + "step": 3885, + "valid_targets_mean": 4611.9, + "valid_targets_min": 732 + }, + { + "epoch": 5.911854103343465, + "grad_norm": 0.5115742550534301, + "learning_rate": 2.8812083744140616e-06, + "loss": 0.1515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15021023154258728, + "step": 3890, + "valid_targets_mean": 4236.3, + "valid_targets_min": 1104 + }, + { + "epoch": 5.919452887537994, + "grad_norm": 0.48224861462921226, + "learning_rate": 2.842141000726726e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1501300185918808, + "step": 3895, + "valid_targets_mean": 4213.5, + "valid_targets_min": 774 + }, + { + "epoch": 5.927051671732523, + "grad_norm": 0.4030631035629361, + "learning_rate": 2.8033200341629886e-06, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12463116645812988, + "step": 3900, + "valid_targets_mean": 5398.5, + "valid_targets_min": 1514 + }, + { + "epoch": 5.934650455927052, + "grad_norm": 0.45096444866968627, + "learning_rate": 2.7647460322377927e-06, + "loss": 0.1766, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17850109934806824, + "step": 3905, + "valid_targets_mean": 5562.8, + "valid_targets_min": 569 + }, + { + "epoch": 5.942249240121581, + "grad_norm": 0.41781952891112895, + "learning_rate": 2.72641954891937e-06, + "loss": 0.1637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.145452082157135, + "step": 3910, + "valid_targets_mean": 5237.6, + "valid_targets_min": 787 + }, + { + "epoch": 5.949848024316109, + "grad_norm": 0.462920182118315, + "learning_rate": 2.688341134621295e-06, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1589956283569336, + "step": 3915, + "valid_targets_mean": 5620.2, + "valid_targets_min": 797 + }, + { + "epoch": 5.957446808510638, + "grad_norm": 0.48846090610407683, + "learning_rate": 2.6505113361945833e-06, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18880805373191833, + "step": 3920, + "valid_targets_mean": 4641.4, + "valid_targets_min": 879 + }, + { + "epoch": 5.9650455927051675, + "grad_norm": 0.440227878411004, + "learning_rate": 2.612930696919822e-06, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13358448445796967, + "step": 3925, + "valid_targets_mean": 4184.0, + "valid_targets_min": 559 + }, + { + "epoch": 5.972644376899696, + "grad_norm": 0.4575207059455806, + "learning_rate": 2.5755997564993894e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14469373226165771, + "step": 3930, + "valid_targets_mean": 4649.6, + "valid_targets_min": 2261 + }, + { + "epoch": 5.980243161094225, + "grad_norm": 0.5005233906873359, + "learning_rate": 2.5385190510496858e-06, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17244486510753632, + "step": 3935, + "valid_targets_mean": 4924.8, + "valid_targets_min": 171 + }, + { + "epoch": 5.9878419452887535, + "grad_norm": 0.4458252216107338, + "learning_rate": 2.5016891130934463e-06, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13665539026260376, + "step": 3940, + "valid_targets_mean": 4990.1, + "valid_targets_min": 500 + }, + { + "epoch": 5.995440729483283, + "grad_norm": 0.4846406288628859, + "learning_rate": 2.465110471552086e-06, + "loss": 0.1664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16603650152683258, + "step": 3945, + "valid_targets_mean": 4758.2, + "valid_targets_min": 599 + }, + { + "epoch": 6.003039513677812, + "grad_norm": 0.44214905635705837, + "learning_rate": 2.4287836517381113e-06, + "loss": 0.1349, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11986207216978073, + "step": 3950, + "valid_targets_mean": 4267.4, + "valid_targets_min": 335 + }, + { + "epoch": 6.01063829787234, + "grad_norm": 0.4887571085350468, + "learning_rate": 2.392709175347554e-06, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14001381397247314, + "step": 3955, + "valid_targets_mean": 4841.9, + "valid_targets_min": 797 + }, + { + "epoch": 6.018237082066869, + "grad_norm": 0.5312808060591666, + "learning_rate": 2.356887560452528e-06, + "loss": 0.1583, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17127478122711182, + "step": 3960, + "valid_targets_mean": 3657.6, + "valid_targets_min": 713 + }, + { + "epoch": 6.025835866261398, + "grad_norm": 0.49420144944896355, + "learning_rate": 2.321319321493718e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15153075754642487, + "step": 3965, + "valid_targets_mean": 4141.9, + "valid_targets_min": 792 + }, + { + "epoch": 6.033434650455927, + "grad_norm": 0.43348847119788875, + "learning_rate": 2.2860049692730745e-06, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15093687176704407, + "step": 3970, + "valid_targets_mean": 5298.1, + "valid_targets_min": 491 + }, + { + "epoch": 6.041033434650456, + "grad_norm": 0.4852668062537144, + "learning_rate": 2.2509450109463903e-06, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15192578732967377, + "step": 3975, + "valid_targets_mean": 4195.6, + "valid_targets_min": 646 + }, + { + "epoch": 6.048632218844984, + "grad_norm": 0.4985600383895827, + "learning_rate": 2.2161399500161005e-06, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12650343775749207, + "step": 3980, + "valid_targets_mean": 4298.5, + "valid_targets_min": 1887 + }, + { + "epoch": 6.056231003039514, + "grad_norm": 0.46036482703942144, + "learning_rate": 2.1815902863239826e-06, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12870556116104126, + "step": 3985, + "valid_targets_mean": 4722.8, + "valid_targets_min": 1027 + }, + { + "epoch": 6.0638297872340425, + "grad_norm": 0.4724257952962997, + "learning_rate": 2.1472965160440307e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15059977769851685, + "step": 3990, + "valid_targets_mean": 4350.8, + "valid_targets_min": 1618 + }, + { + "epoch": 6.071428571428571, + "grad_norm": 0.4736941877158723, + "learning_rate": 2.1132591316752824e-06, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12970831990242004, + "step": 3995, + "valid_targets_mean": 4671.1, + "valid_targets_min": 1136 + }, + { + "epoch": 6.079027355623101, + "grad_norm": 0.4790995642913263, + "learning_rate": 2.079478622034803e-06, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1599026918411255, + "step": 4000, + "valid_targets_mean": 5553.9, + "valid_targets_min": 2157 + }, + { + "epoch": 6.086626139817629, + "grad_norm": 0.5750210601223926, + "learning_rate": 2.045955472250598e-06, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1329852193593979, + "step": 4005, + "valid_targets_mean": 4740.1, + "valid_targets_min": 658 + }, + { + "epoch": 6.094224924012158, + "grad_norm": 0.5356962782262873, + "learning_rate": 2.012690163754716e-06, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1583370566368103, + "step": 4010, + "valid_targets_mean": 3881.9, + "valid_targets_min": 833 + }, + { + "epoch": 6.101823708206687, + "grad_norm": 0.5430593717962985, + "learning_rate": 1.9796831742762658e-06, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17519140243530273, + "step": 4015, + "valid_targets_mean": 4008.7, + "valid_targets_min": 513 + }, + { + "epoch": 6.109422492401216, + "grad_norm": 0.5395476454999809, + "learning_rate": 1.9469349778346223e-06, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1673513501882553, + "step": 4020, + "valid_targets_mean": 3863.8, + "valid_targets_min": 862 + }, + { + "epoch": 6.117021276595745, + "grad_norm": 0.6041814504745721, + "learning_rate": 1.9144460447325564e-06, + "loss": 0.1433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16433076560497284, + "step": 4025, + "valid_targets_mean": 4698.3, + "valid_targets_min": 821 + }, + { + "epoch": 6.124620060790273, + "grad_norm": 0.6094776194613557, + "learning_rate": 1.8822168415495422e-06, + "loss": 0.1453, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13730981945991516, + "step": 4030, + "valid_targets_mean": 4077.6, + "valid_targets_min": 901 + }, + { + "epoch": 6.132218844984802, + "grad_norm": 0.45063574077263974, + "learning_rate": 1.8502478311349947e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16246050596237183, + "step": 4035, + "valid_targets_mean": 5466.6, + "valid_targets_min": 2424 + }, + { + "epoch": 6.139817629179332, + "grad_norm": 0.4816199109533305, + "learning_rate": 1.8185394726016791e-06, + "loss": 0.1604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1370609700679779, + "step": 4040, + "valid_targets_mean": 4561.9, + "valid_targets_min": 743 + }, + { + "epoch": 6.14741641337386, + "grad_norm": 0.6732958197054807, + "learning_rate": 1.7870922213190755e-06, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16209891438484192, + "step": 4045, + "valid_targets_mean": 3887.8, + "valid_targets_min": 550 + }, + { + "epoch": 6.155015197568389, + "grad_norm": 0.4601061012528339, + "learning_rate": 1.7559065289068633e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13671234250068665, + "step": 4050, + "valid_targets_mean": 5133.4, + "valid_targets_min": 479 + }, + { + "epoch": 6.1626139817629175, + "grad_norm": 0.43746420290219207, + "learning_rate": 1.72498284322842e-06, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1267484724521637, + "step": 4055, + "valid_targets_mean": 5035.9, + "valid_targets_min": 1853 + }, + { + "epoch": 6.170212765957447, + "grad_norm": 0.4569139573946025, + "learning_rate": 1.694321608384406e-06, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14077520370483398, + "step": 4060, + "valid_targets_mean": 5069.5, + "valid_targets_min": 2002 + }, + { + "epoch": 6.177811550151976, + "grad_norm": 0.43274533973618445, + "learning_rate": 1.663923264706373e-06, + "loss": 0.1329, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14554743468761444, + "step": 4065, + "valid_targets_mean": 5077.8, + "valid_targets_min": 519 + }, + { + "epoch": 6.185410334346504, + "grad_norm": 0.47582107432910764, + "learning_rate": 1.6337882487504452e-06, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14857840538024902, + "step": 4070, + "valid_targets_mean": 4424.7, + "valid_targets_min": 674 + }, + { + "epoch": 6.193009118541034, + "grad_norm": 0.5274167558239128, + "learning_rate": 1.603916993291048e-06, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14230819046497345, + "step": 4075, + "valid_targets_mean": 3961.1, + "valid_targets_min": 654 + }, + { + "epoch": 6.2006079027355625, + "grad_norm": 0.49913070570635226, + "learning_rate": 1.5743099273146967e-06, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440865695476532, + "step": 4080, + "valid_targets_mean": 5009.5, + "valid_targets_min": 444 + }, + { + "epoch": 6.208206686930091, + "grad_norm": 0.4571022258136094, + "learning_rate": 1.5449674760138344e-06, + "loss": 0.1355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1474510282278061, + "step": 4085, + "valid_targets_mean": 4348.4, + "valid_targets_min": 770 + }, + { + "epoch": 6.21580547112462, + "grad_norm": 0.4714848047264726, + "learning_rate": 1.5158900607807248e-06, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16296353936195374, + "step": 4090, + "valid_targets_mean": 4949.5, + "valid_targets_min": 694 + }, + { + "epoch": 6.223404255319149, + "grad_norm": 0.4322780542533103, + "learning_rate": 1.4870780992013956e-06, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13294166326522827, + "step": 4095, + "valid_targets_mean": 4950.1, + "valid_targets_min": 329 + }, + { + "epoch": 6.231003039513678, + "grad_norm": 0.5464674366866006, + "learning_rate": 1.4585320050496531e-06, + "loss": 0.1446, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15174424648284912, + "step": 4100, + "valid_targets_mean": 4007.2, + "valid_targets_min": 834 + }, + { + "epoch": 6.238601823708207, + "grad_norm": 0.42685774893365297, + "learning_rate": 1.4302521882811316e-06, + "loss": 0.1441, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1383514404296875, + "step": 4105, + "valid_targets_mean": 5403.9, + "valid_targets_min": 918 + }, + { + "epoch": 6.246200607902735, + "grad_norm": 0.5366963612937214, + "learning_rate": 1.4022390550274034e-06, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15787436068058014, + "step": 4110, + "valid_targets_mean": 3990.0, + "valid_targets_min": 434 + }, + { + "epoch": 6.253799392097265, + "grad_norm": 0.46724324680744234, + "learning_rate": 1.3744930075901563e-06, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14154699444770813, + "step": 4115, + "valid_targets_mean": 4727.9, + "valid_targets_min": 588 + }, + { + "epoch": 6.261398176291793, + "grad_norm": 0.41711994711720174, + "learning_rate": 1.3470144444354061e-06, + "loss": 0.1499, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1419452279806137, + "step": 4120, + "valid_targets_mean": 5510.2, + "valid_targets_min": 713 + }, + { + "epoch": 6.268996960486322, + "grad_norm": 0.5220642496809625, + "learning_rate": 1.3198037601877789e-06, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1353391706943512, + "step": 4125, + "valid_targets_mean": 4115.4, + "valid_targets_min": 450 + }, + { + "epoch": 6.276595744680851, + "grad_norm": 0.4939240759429581, + "learning_rate": 1.2928613456248473e-06, + "loss": 0.1614, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15414312481880188, + "step": 4130, + "valid_targets_mean": 5176.1, + "valid_targets_min": 571 + }, + { + "epoch": 6.28419452887538, + "grad_norm": 0.442808139834025, + "learning_rate": 1.266187587671508e-06, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15302585065364838, + "step": 4135, + "valid_targets_mean": 5258.6, + "valid_targets_min": 2495 + }, + { + "epoch": 6.291793313069909, + "grad_norm": 0.4548322696937593, + "learning_rate": 1.2397828693944346e-06, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14306071400642395, + "step": 4140, + "valid_targets_mean": 4863.1, + "valid_targets_min": 1948 + }, + { + "epoch": 6.2993920972644375, + "grad_norm": 0.46459072507225524, + "learning_rate": 1.2136475699965766e-06, + "loss": 0.1448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1553143858909607, + "step": 4145, + "valid_targets_mean": 4614.7, + "valid_targets_min": 816 + }, + { + "epoch": 6.306990881458967, + "grad_norm": 0.4740383536049804, + "learning_rate": 1.1877820648117045e-06, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13636600971221924, + "step": 4150, + "valid_targets_mean": 4426.7, + "valid_targets_min": 668 + }, + { + "epoch": 6.314589665653496, + "grad_norm": 0.6354900899788738, + "learning_rate": 1.162186725299026e-06, + "loss": 0.1596, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.155159130692482, + "step": 4155, + "valid_targets_mean": 3329.7, + "valid_targets_min": 585 + }, + { + "epoch": 6.322188449848024, + "grad_norm": 0.4835553520976796, + "learning_rate": 1.1368619190378527e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17658662796020508, + "step": 4160, + "valid_targets_mean": 4895.8, + "valid_targets_min": 1482 + }, + { + "epoch": 6.329787234042553, + "grad_norm": 0.4554986467408132, + "learning_rate": 1.1118080097223194e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15265491604804993, + "step": 4165, + "valid_targets_mean": 5167.2, + "valid_targets_min": 1292 + }, + { + "epoch": 6.3373860182370825, + "grad_norm": 0.41973694830585506, + "learning_rate": 1.0870253571561595e-06, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12299495190382004, + "step": 4170, + "valid_targets_mean": 4780.7, + "valid_targets_min": 970 + }, + { + "epoch": 6.344984802431611, + "grad_norm": 0.5118900681918644, + "learning_rate": 1.0625143172475404e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15326988697052002, + "step": 4175, + "valid_targets_mean": 4208.6, + "valid_targets_min": 636 + }, + { + "epoch": 6.35258358662614, + "grad_norm": 0.49642090601129557, + "learning_rate": 1.0382752420039455e-06, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15455183386802673, + "step": 4180, + "valid_targets_mean": 4102.3, + "valid_targets_min": 997 + }, + { + "epoch": 6.360182370820668, + "grad_norm": 0.45380918848581747, + "learning_rate": 1.0143084795271329e-06, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15013962984085083, + "step": 4185, + "valid_targets_mean": 4628.6, + "valid_targets_min": 1572 + }, + { + "epoch": 6.367781155015198, + "grad_norm": 0.5198176682483998, + "learning_rate": 9.906143740081232e-07, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15279626846313477, + "step": 4190, + "valid_targets_mean": 3914.1, + "valid_targets_min": 365 + }, + { + "epoch": 6.375379939209727, + "grad_norm": 0.4840441603322201, + "learning_rate": 9.671932657222593e-07, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14245177805423737, + "step": 4195, + "valid_targets_mean": 4812.6, + "valid_targets_min": 370 + }, + { + "epoch": 6.382978723404255, + "grad_norm": 0.49127649884609786, + "learning_rate": 9.440454910243235e-07, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1520758867263794, + "step": 4200, + "valid_targets_mean": 4315.1, + "valid_targets_min": 517 + }, + { + "epoch": 6.390577507598784, + "grad_norm": 0.4383379165078549, + "learning_rate": 9.211713823437063e-07, + "loss": 0.135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1275199055671692, + "step": 4205, + "valid_targets_mean": 5704.6, + "valid_targets_min": 581 + }, + { + "epoch": 6.398176291793313, + "grad_norm": 0.5022657150694129, + "learning_rate": 8.985712681796288e-07, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17914274334907532, + "step": 4210, + "valid_targets_mean": 4508.4, + "valid_targets_min": 640 + }, + { + "epoch": 6.405775075987842, + "grad_norm": 0.443057452645931, + "learning_rate": 8.762454730964265e-07, + "loss": 0.1503, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15875935554504395, + "step": 4215, + "valid_targets_mean": 5279.9, + "valid_targets_min": 972 + }, + { + "epoch": 6.413373860182371, + "grad_norm": 0.5449589653106505, + "learning_rate": 8.541943177188882e-07, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14582566916942596, + "step": 4220, + "valid_targets_mean": 4343.2, + "valid_targets_min": 892 + }, + { + "epoch": 6.420972644376899, + "grad_norm": 0.4997679647771693, + "learning_rate": 8.324181187276581e-07, + "loss": 0.1643, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15510517358779907, + "step": 4225, + "valid_targets_mean": 4085.7, + "valid_targets_min": 912 + }, + { + "epoch": 6.428571428571429, + "grad_norm": 0.44678133251760943, + "learning_rate": 8.109171888546763e-07, + "loss": 0.1554, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15751153230667114, + "step": 4230, + "valid_targets_mean": 5778.9, + "valid_targets_min": 1954 + }, + { + "epoch": 6.4361702127659575, + "grad_norm": 0.46788177391921826, + "learning_rate": 7.896918368786921e-07, + "loss": 0.16, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15475589036941528, + "step": 4235, + "valid_targets_mean": 5169.8, + "valid_targets_min": 398 + }, + { + "epoch": 6.443768996960486, + "grad_norm": 0.5529538967645823, + "learning_rate": 7.687423676208361e-07, + "loss": 0.1455, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13819439709186554, + "step": 4240, + "valid_targets_mean": 3202.6, + "valid_targets_min": 528 + }, + { + "epoch": 6.451367781155016, + "grad_norm": 0.5420949591531663, + "learning_rate": 7.480690819402348e-07, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1723976731300354, + "step": 4245, + "valid_targets_mean": 3846.0, + "valid_targets_min": 630 + }, + { + "epoch": 6.458966565349544, + "grad_norm": 0.46741861641823984, + "learning_rate": 7.276722767296873e-07, + "loss": 0.1677, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1849384605884552, + "step": 4250, + "valid_targets_mean": 5119.9, + "valid_targets_min": 883 + }, + { + "epoch": 6.466565349544073, + "grad_norm": 0.4582713436855528, + "learning_rate": 7.075522449114158e-07, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15820877254009247, + "step": 4255, + "valid_targets_mean": 5030.8, + "valid_targets_min": 802 + }, + { + "epoch": 6.474164133738602, + "grad_norm": 0.5211860109572727, + "learning_rate": 6.877092754328419e-07, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13718628883361816, + "step": 4260, + "valid_targets_mean": 3807.9, + "valid_targets_min": 845 + }, + { + "epoch": 6.481762917933131, + "grad_norm": 0.45451059770584273, + "learning_rate": 6.681436532624474e-07, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1656309813261032, + "step": 4265, + "valid_targets_mean": 5183.2, + "valid_targets_min": 1002 + }, + { + "epoch": 6.48936170212766, + "grad_norm": 0.47843071748812777, + "learning_rate": 6.488556593856809e-07, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.150562584400177, + "step": 4270, + "valid_targets_mean": 4543.6, + "valid_targets_min": 656 + }, + { + "epoch": 6.496960486322188, + "grad_norm": 0.5319539824885515, + "learning_rate": 6.298455708009176e-07, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15512758493423462, + "step": 4275, + "valid_targets_mean": 3799.5, + "valid_targets_min": 395 + }, + { + "epoch": 6.504559270516717, + "grad_norm": 0.4705596941066166, + "learning_rate": 6.111136605154877e-07, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16856907308101654, + "step": 4280, + "valid_targets_mean": 4642.4, + "valid_targets_min": 589 + }, + { + "epoch": 6.5121580547112465, + "grad_norm": 0.46141711207212177, + "learning_rate": 5.926601975417501e-07, + "loss": 0.1504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17225798964500427, + "step": 4285, + "valid_targets_mean": 4939.4, + "valid_targets_min": 706 + }, + { + "epoch": 6.519756838905775, + "grad_norm": 0.4495104074555779, + "learning_rate": 5.744854468932315e-07, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445414125919342, + "step": 4290, + "valid_targets_mean": 4603.4, + "valid_targets_min": 810 + }, + { + "epoch": 6.527355623100304, + "grad_norm": 0.5733777706336946, + "learning_rate": 5.565896695808203e-07, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16275358200073242, + "step": 4295, + "valid_targets_mean": 4785.0, + "valid_targets_min": 599 + }, + { + "epoch": 6.5349544072948325, + "grad_norm": 0.8991578623134154, + "learning_rate": 5.389731226090189e-07, + "loss": 0.144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14771892130374908, + "step": 4300, + "valid_targets_mean": 4996.3, + "valid_targets_min": 981 + }, + { + "epoch": 6.542553191489362, + "grad_norm": 0.4730812664882942, + "learning_rate": 5.216360589722546e-07, + "loss": 0.134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12634432315826416, + "step": 4305, + "valid_targets_mean": 4793.1, + "valid_targets_min": 689 + }, + { + "epoch": 6.550151975683891, + "grad_norm": 0.7912559796750507, + "learning_rate": 5.045787276512371e-07, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14239290356636047, + "step": 4310, + "valid_targets_mean": 4268.8, + "valid_targets_min": 558 + }, + { + "epoch": 6.557750759878419, + "grad_norm": 0.5029546562969596, + "learning_rate": 4.878013736093979e-07, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1558806449174881, + "step": 4315, + "valid_targets_mean": 4656.5, + "valid_targets_min": 767 + }, + { + "epoch": 6.565349544072948, + "grad_norm": 0.4604621873568329, + "learning_rate": 4.713042377893562e-07, + "loss": 0.1377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1444740742444992, + "step": 4320, + "valid_targets_mean": 4422.7, + "valid_targets_min": 346 + }, + { + "epoch": 6.572948328267477, + "grad_norm": 0.4561980791707648, + "learning_rate": 4.550875571094726e-07, + "loss": 0.1712, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14359544217586517, + "step": 4325, + "valid_targets_mean": 4979.9, + "valid_targets_min": 2820 + }, + { + "epoch": 6.580547112462006, + "grad_norm": 0.47209358910428856, + "learning_rate": 4.391515644604383e-07, + "loss": 0.1502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1348213255405426, + "step": 4330, + "valid_targets_mean": 4494.7, + "valid_targets_min": 589 + }, + { + "epoch": 6.588145896656535, + "grad_norm": 0.4994707929739602, + "learning_rate": 4.2349648870193103e-07, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15195885300636292, + "step": 4335, + "valid_targets_mean": 4239.8, + "valid_targets_min": 307 + }, + { + "epoch": 6.595744680851064, + "grad_norm": 0.44042761045131223, + "learning_rate": 4.081225546593337e-07, + "loss": 0.1513, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.129265695810318, + "step": 4340, + "valid_targets_mean": 5283.0, + "valid_targets_min": 1159 + }, + { + "epoch": 6.603343465045593, + "grad_norm": 0.47257646281879734, + "learning_rate": 3.9302998312049865e-07, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14243176579475403, + "step": 4345, + "valid_targets_mean": 5513.4, + "valid_targets_min": 1072 + }, + { + "epoch": 6.6109422492401215, + "grad_norm": 0.5058368728464357, + "learning_rate": 3.782189908325817e-07, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14209382236003876, + "step": 4350, + "valid_targets_mean": 4845.4, + "valid_targets_min": 876 + }, + { + "epoch": 6.61854103343465, + "grad_norm": 0.7222155225557564, + "learning_rate": 3.636897904989312e-07, + "loss": 0.1581, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17290833592414856, + "step": 4355, + "valid_targets_mean": 5422.5, + "valid_targets_min": 833 + }, + { + "epoch": 6.62613981762918, + "grad_norm": 0.5233180981965705, + "learning_rate": 3.494425907760235e-07, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1492675244808197, + "step": 4360, + "valid_targets_mean": 4823.8, + "valid_targets_min": 1224 + }, + { + "epoch": 6.633738601823708, + "grad_norm": 0.5428953839010765, + "learning_rate": 3.3547759627047927e-07, + "loss": 0.1585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16122207045555115, + "step": 4365, + "valid_targets_mean": 3899.6, + "valid_targets_min": 753 + }, + { + "epoch": 6.641337386018237, + "grad_norm": 0.4419180172745714, + "learning_rate": 3.2179500753611423e-07, + "loss": 0.1436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.135797381401062, + "step": 4370, + "valid_targets_mean": 5275.6, + "valid_targets_min": 861 + }, + { + "epoch": 6.648936170212766, + "grad_norm": 0.4922555679423693, + "learning_rate": 3.0839502107106625e-07, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13909777998924255, + "step": 4375, + "valid_targets_mean": 3951.1, + "valid_targets_min": 702 + }, + { + "epoch": 6.656534954407295, + "grad_norm": 0.5087072512502506, + "learning_rate": 2.952778293149705e-07, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16498416662216187, + "step": 4380, + "valid_targets_mean": 4312.6, + "valid_targets_min": 429 + }, + { + "epoch": 6.664133738601824, + "grad_norm": 0.4560459158918358, + "learning_rate": 2.8244362064619777e-07, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13717833161354065, + "step": 4385, + "valid_targets_mean": 5210.4, + "valid_targets_min": 864 + }, + { + "epoch": 6.671732522796352, + "grad_norm": 0.5227653302420172, + "learning_rate": 2.698925793791407e-07, + "loss": 0.1476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13961048424243927, + "step": 4390, + "valid_targets_mean": 4236.1, + "valid_targets_min": 534 + }, + { + "epoch": 6.679331306990882, + "grad_norm": 0.6213258302004528, + "learning_rate": 2.576248857615826e-07, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14259956777095795, + "step": 4395, + "valid_targets_mean": 4035.6, + "valid_targets_min": 752 + }, + { + "epoch": 6.686930091185411, + "grad_norm": 0.4688652989866426, + "learning_rate": 2.4564071597209304e-07, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15681865811347961, + "step": 4400, + "valid_targets_mean": 4584.2, + "valid_targets_min": 869 + }, + { + "epoch": 6.694528875379939, + "grad_norm": 0.44005419382672806, + "learning_rate": 2.3394024211750964e-07, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1523476541042328, + "step": 4405, + "valid_targets_mean": 4952.2, + "valid_targets_min": 646 + }, + { + "epoch": 6.702127659574468, + "grad_norm": 0.5205262877358279, + "learning_rate": 2.2252363223045358e-07, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16646242141723633, + "step": 4410, + "valid_targets_mean": 4151.1, + "valid_targets_min": 820 + }, + { + "epoch": 6.7097264437689965, + "grad_norm": 0.40766981515095174, + "learning_rate": 2.1139105026693586e-07, + "loss": 0.139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11196255683898926, + "step": 4415, + "valid_targets_mean": 5557.6, + "valid_targets_min": 2410 + }, + { + "epoch": 6.717325227963526, + "grad_norm": 0.4673986831108265, + "learning_rate": 2.0054265610397916e-07, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1210668534040451, + "step": 4420, + "valid_targets_mean": 3992.8, + "valid_targets_min": 936 + }, + { + "epoch": 6.724924012158055, + "grad_norm": 0.5619671678982381, + "learning_rate": 1.8997860553733981e-07, + "loss": 0.155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17317882180213928, + "step": 4425, + "valid_targets_mean": 3874.4, + "valid_targets_min": 446 + }, + { + "epoch": 6.732522796352583, + "grad_norm": 0.4280630319229206, + "learning_rate": 1.7969905027926504e-07, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1904495656490326, + "step": 4430, + "valid_targets_mean": 6671.5, + "valid_targets_min": 907 + }, + { + "epoch": 6.740121580547113, + "grad_norm": 0.44122684787277994, + "learning_rate": 1.6970413795631025e-07, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14559206366539001, + "step": 4435, + "valid_targets_mean": 5373.0, + "valid_targets_min": 595 + }, + { + "epoch": 6.7477203647416415, + "grad_norm": 0.5080820811124032, + "learning_rate": 1.5999401210722075e-07, + "loss": 0.1423, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340736597776413, + "step": 4440, + "valid_targets_mean": 3668.2, + "valid_targets_min": 694 + }, + { + "epoch": 6.75531914893617, + "grad_norm": 0.5257059339394762, + "learning_rate": 1.5056881218088016e-07, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15984541177749634, + "step": 4445, + "valid_targets_mean": 4337.5, + "valid_targets_min": 498 + }, + { + "epoch": 6.762917933130699, + "grad_norm": 0.43327223465331344, + "learning_rate": 1.4142867353428514e-07, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.157816082239151, + "step": 4450, + "valid_targets_mean": 5491.9, + "valid_targets_min": 827 + }, + { + "epoch": 6.770516717325228, + "grad_norm": 0.50219861891989, + "learning_rate": 1.3257372743063157e-07, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11714717745780945, + "step": 4455, + "valid_targets_mean": 3812.8, + "valid_targets_min": 502 + }, + { + "epoch": 6.778115501519757, + "grad_norm": 0.45055666905824504, + "learning_rate": 1.2400410103740045e-07, + "loss": 0.136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14584630727767944, + "step": 4460, + "valid_targets_mean": 5178.9, + "valid_targets_min": 516 + }, + { + "epoch": 6.785714285714286, + "grad_norm": 0.43952764279896794, + "learning_rate": 1.157199174245549e-07, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13095200061798096, + "step": 4465, + "valid_targets_mean": 5314.9, + "valid_targets_min": 1217 + }, + { + "epoch": 6.793313069908814, + "grad_norm": 0.4738968512497199, + "learning_rate": 1.0772129556275268e-07, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12924160063266754, + "step": 4470, + "valid_targets_mean": 4845.8, + "valid_targets_min": 843 + }, + { + "epoch": 6.800911854103344, + "grad_norm": 0.5060295374260998, + "learning_rate": 1.0000835032165645e-07, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1658594310283661, + "step": 4475, + "valid_targets_mean": 4230.7, + "valid_targets_min": 915 + }, + { + "epoch": 6.808510638297872, + "grad_norm": 0.4466023053432, + "learning_rate": 9.258119246826625e-08, + "loss": 0.1447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1489754319190979, + "step": 4480, + "valid_targets_mean": 5316.6, + "valid_targets_min": 2627 + }, + { + "epoch": 6.816109422492401, + "grad_norm": 0.46338292936158193, + "learning_rate": 8.543992866534734e-08, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14392951130867004, + "step": 4485, + "valid_targets_mean": 4956.6, + "valid_targets_min": 409 + }, + { + "epoch": 6.823708206686931, + "grad_norm": 0.5158394667496263, + "learning_rate": 7.858466146988042e-08, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16497617959976196, + "step": 4490, + "valid_targets_mean": 4279.7, + "valid_targets_min": 775 + }, + { + "epoch": 6.831306990881459, + "grad_norm": 0.42417696152695383, + "learning_rate": 7.201548933160275e-08, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14127472043037415, + "step": 4495, + "valid_targets_mean": 5479.2, + "valid_targets_min": 2631 + }, + { + "epoch": 6.838905775075988, + "grad_norm": 0.5211912905058261, + "learning_rate": 6.573250659158481e-08, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14878888428211212, + "step": 4500, + "valid_targets_mean": 4435.9, + "valid_targets_min": 2412 + }, + { + "epoch": 6.8465045592705165, + "grad_norm": 0.43147229360794226, + "learning_rate": 5.973580348088259e-08, + "loss": 0.154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15211549401283264, + "step": 4505, + "valid_targets_mean": 5367.4, + "valid_targets_min": 2364 + }, + { + "epoch": 6.854103343465045, + "grad_norm": 0.4561025698955299, + "learning_rate": 5.4025466119234094e-08, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13766488432884216, + "step": 4510, + "valid_targets_mean": 5133.6, + "valid_targets_min": 655 + }, + { + "epoch": 6.861702127659575, + "grad_norm": 0.4670200688124053, + "learning_rate": 4.860157651383146e-08, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12196432799100876, + "step": 4515, + "valid_targets_mean": 5354.1, + "valid_targets_min": 1941 + }, + { + "epoch": 6.869300911854103, + "grad_norm": 0.46811141014194185, + "learning_rate": 4.346421255813527e-08, + "loss": 0.1485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1508735716342926, + "step": 4520, + "valid_targets_mean": 4876.9, + "valid_targets_min": 647 + }, + { + "epoch": 6.876899696048632, + "grad_norm": 0.5309409348069238, + "learning_rate": 3.8613448030759836e-08, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1319991499185562, + "step": 4525, + "valid_targets_mean": 4153.0, + "valid_targets_min": 558 + }, + { + "epoch": 6.8844984802431615, + "grad_norm": 0.4583034610783906, + "learning_rate": 3.404935259441633e-08, + "loss": 0.1482, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1385440230369568, + "step": 4530, + "valid_targets_mean": 5040.1, + "valid_targets_min": 666 + }, + { + "epoch": 6.89209726443769, + "grad_norm": 0.5760708842558298, + "learning_rate": 2.977199179490686e-08, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16344650089740753, + "step": 4535, + "valid_targets_mean": 4789.9, + "valid_targets_min": 774 + }, + { + "epoch": 6.899696048632219, + "grad_norm": 0.5268446726565859, + "learning_rate": 2.5781427060183052e-08, + "loss": 0.1543, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1606959104537964, + "step": 4540, + "valid_targets_mean": 4283.8, + "valid_targets_min": 229 + }, + { + "epoch": 6.907294832826747, + "grad_norm": 0.4734646023589767, + "learning_rate": 2.2077715699468928e-08, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14003373682498932, + "step": 4545, + "valid_targets_mean": 4974.8, + "valid_targets_min": 773 + }, + { + "epoch": 6.914893617021277, + "grad_norm": 0.4360796739556199, + "learning_rate": 1.8660910902434936e-08, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12974536418914795, + "step": 4550, + "valid_targets_mean": 5910.8, + "valid_targets_min": 1114 + }, + { + "epoch": 6.922492401215806, + "grad_norm": 0.48684145305746557, + "learning_rate": 1.5531061738436327e-08, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15198959410190582, + "step": 4555, + "valid_targets_mean": 3936.3, + "valid_targets_min": 1779 + }, + { + "epoch": 6.930091185410334, + "grad_norm": 0.5091294127835059, + "learning_rate": 1.2688213155802598e-08, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1628214567899704, + "step": 4560, + "valid_targets_mean": 4214.5, + "valid_targets_min": 683 + }, + { + "epoch": 6.937689969604863, + "grad_norm": 0.43005172219863, + "learning_rate": 1.0132405981195804e-08, + "loss": 0.1287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13487809896469116, + "step": 4565, + "valid_targets_mean": 5104.1, + "valid_targets_min": 2548 + }, + { + "epoch": 6.945288753799392, + "grad_norm": 0.5851616543398429, + "learning_rate": 7.863676919031005e-09, + "loss": 0.1459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13807334005832672, + "step": 4570, + "valid_targets_mean": 4572.6, + "valid_targets_min": 792 + }, + { + "epoch": 6.952887537993921, + "grad_norm": 0.5007425137471639, + "learning_rate": 5.882058550932268e-09, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1591353416442871, + "step": 4575, + "valid_targets_mean": 3956.2, + "valid_targets_min": 447 + }, + { + "epoch": 6.96048632218845, + "grad_norm": 0.5253135197553259, + "learning_rate": 4.187579335281911e-09, + "loss": 0.1618, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16565826535224915, + "step": 4580, + "valid_targets_mean": 3869.7, + "valid_targets_min": 1117 + }, + { + "epoch": 6.968085106382979, + "grad_norm": 0.5136974200238252, + "learning_rate": 2.780263606805278e-09, + "loss": 0.1449, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1372585892677307, + "step": 4585, + "valid_targets_mean": 4006.6, + "valid_targets_min": 538 + }, + { + "epoch": 6.975683890577508, + "grad_norm": 0.5478996499330996, + "learning_rate": 1.6601315762154735e-09, + "loss": 0.1487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1610221564769745, + "step": 4590, + "valid_targets_mean": 4602.3, + "valid_targets_min": 741 + }, + { + "epoch": 6.9832826747720365, + "grad_norm": 0.48873947966074016, + "learning_rate": 8.271993299358017e-10, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16153308749198914, + "step": 4595, + "valid_targets_mean": 4922.5, + "valid_targets_min": 589 + }, + { + "epoch": 6.990881458966565, + "grad_norm": 0.484585895107589, + "learning_rate": 2.814788298532989e-10, + "loss": 0.1529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14617104828357697, + "step": 4600, + "valid_targets_mean": 4203.9, + "valid_targets_min": 714 + }, + { + "epoch": 6.998480243161094, + "grad_norm": 0.4567401537531439, + "learning_rate": 2.2977913158861444e-11, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15201044082641602, + "step": 4605, + "valid_targets_mean": 5189.9, + "valid_targets_min": 511 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1645904779434204, + "step": 4606, + "total_flos": 1702570888593408.0, + "train_loss": 0.19200077259858897, + "train_runtime": 28350.7795, + "train_samples_per_second": 2.597, + "train_steps_per_second": 0.162, + "valid_targets_mean": 3879.8, + "valid_targets_min": 728 + } + ], + "logging_steps": 5, + "max_steps": 4606, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1702570888593408.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}