diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10464 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 4739, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.007385524372230428, + "grad_norm": 19.97872360716285, + "learning_rate": 3.3755274261603377e-07, + "loss": 0.624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.7236873507499695, + "step": 5, + "valid_targets_mean": 4191.2, + "valid_targets_min": 560 + }, + { + "epoch": 0.014771048744460856, + "grad_norm": 16.297750328748872, + "learning_rate": 7.59493670886076e-07, + "loss": 0.6447, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.6396403312683105, + "step": 10, + "valid_targets_mean": 3906.9, + "valid_targets_min": 617 + }, + { + "epoch": 0.022156573116691284, + "grad_norm": 13.56762186389351, + "learning_rate": 1.1814345991561182e-06, + "loss": 0.6682, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5922929048538208, + "step": 15, + "valid_targets_mean": 5468.2, + "valid_targets_min": 573 + }, + { + "epoch": 0.029542097488921712, + "grad_norm": 11.653889962740626, + "learning_rate": 1.6033755274261605e-06, + "loss": 0.5585, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5627623796463013, + "step": 20, + "valid_targets_mean": 4871.1, + "valid_targets_min": 581 + }, + { + "epoch": 0.03692762186115214, + "grad_norm": 8.00562953617597, + "learning_rate": 2.0253164556962026e-06, + "loss": 0.5008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.5002298355102539, + "step": 25, + "valid_targets_mean": 4081.2, + "valid_targets_min": 400 + }, + { + "epoch": 0.04431314623338257, + "grad_norm": 4.397412308851758, + "learning_rate": 2.447257383966245e-06, + "loss": 0.4932, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4381677806377411, + "step": 30, + "valid_targets_mean": 5501.4, + "valid_targets_min": 638 + }, + { + "epoch": 0.051698670605613, + "grad_norm": 2.3819720550400656, + "learning_rate": 2.8691983122362873e-06, + "loss": 0.4548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.4095567464828491, + "step": 35, + "valid_targets_mean": 6018.8, + "valid_targets_min": 479 + }, + { + "epoch": 0.059084194977843424, + "grad_norm": 1.5356961908394742, + "learning_rate": 3.2911392405063294e-06, + "loss": 0.4411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3926240801811218, + "step": 40, + "valid_targets_mean": 4400.6, + "valid_targets_min": 517 + }, + { + "epoch": 0.06646971935007386, + "grad_norm": 1.0870293491792131, + "learning_rate": 3.713080168776372e-06, + "loss": 0.4433, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.417349636554718, + "step": 45, + "valid_targets_mean": 5836.1, + "valid_targets_min": 762 + }, + { + "epoch": 0.07385524372230429, + "grad_norm": 0.943385047160246, + "learning_rate": 4.135021097046414e-06, + "loss": 0.4154, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.41143539547920227, + "step": 50, + "valid_targets_mean": 5084.6, + "valid_targets_min": 639 + }, + { + "epoch": 0.08124076809453472, + "grad_norm": 1.011998448398478, + "learning_rate": 4.556962025316456e-06, + "loss": 0.4151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.39654070138931274, + "step": 55, + "valid_targets_mean": 5247.8, + "valid_targets_min": 681 + }, + { + "epoch": 0.08862629246676514, + "grad_norm": 0.686487046735958, + "learning_rate": 4.978902953586498e-06, + "loss": 0.3792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33487510681152344, + "step": 60, + "valid_targets_mean": 5221.6, + "valid_targets_min": 490 + }, + { + "epoch": 0.09601181683899557, + "grad_norm": 0.571553738713959, + "learning_rate": 5.40084388185654e-06, + "loss": 0.3743, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3575626313686371, + "step": 65, + "valid_targets_mean": 6041.3, + "valid_targets_min": 618 + }, + { + "epoch": 0.103397341211226, + "grad_norm": 0.49365785429432335, + "learning_rate": 5.8227848101265824e-06, + "loss": 0.3463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30460304021835327, + "step": 70, + "valid_targets_mean": 5298.4, + "valid_targets_min": 535 + }, + { + "epoch": 0.11078286558345643, + "grad_norm": 0.5050364830057085, + "learning_rate": 6.244725738396625e-06, + "loss": 0.3399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3081740140914917, + "step": 75, + "valid_targets_mean": 5255.7, + "valid_targets_min": 652 + }, + { + "epoch": 0.11816838995568685, + "grad_norm": 0.5226990318037864, + "learning_rate": 6.666666666666667e-06, + "loss": 0.3904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3145563006401062, + "step": 80, + "valid_targets_mean": 4663.9, + "valid_targets_min": 392 + }, + { + "epoch": 0.1255539143279173, + "grad_norm": 0.4881100768231626, + "learning_rate": 7.08860759493671e-06, + "loss": 0.3726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30497312545776367, + "step": 85, + "valid_targets_mean": 5632.1, + "valid_targets_min": 719 + }, + { + "epoch": 0.1329394387001477, + "grad_norm": 0.5237726651638548, + "learning_rate": 7.510548523206752e-06, + "loss": 0.3544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3278190791606903, + "step": 90, + "valid_targets_mean": 6220.0, + "valid_targets_min": 751 + }, + { + "epoch": 0.14032496307237813, + "grad_norm": 0.4650280516019618, + "learning_rate": 7.932489451476793e-06, + "loss": 0.3182, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28953981399536133, + "step": 95, + "valid_targets_mean": 5031.3, + "valid_targets_min": 642 + }, + { + "epoch": 0.14771048744460857, + "grad_norm": 0.46371951812182943, + "learning_rate": 8.354430379746837e-06, + "loss": 0.3099, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.33909645676612854, + "step": 100, + "valid_targets_mean": 6196.9, + "valid_targets_min": 545 + }, + { + "epoch": 0.155096011816839, + "grad_norm": 0.4568538569230339, + "learning_rate": 8.776371308016879e-06, + "loss": 0.3471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2957189679145813, + "step": 105, + "valid_targets_mean": 6063.0, + "valid_targets_min": 584 + }, + { + "epoch": 0.16248153618906944, + "grad_norm": 0.4578520750020651, + "learning_rate": 9.198312236286921e-06, + "loss": 0.3276, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29967236518859863, + "step": 110, + "valid_targets_mean": 6171.0, + "valid_targets_min": 540 + }, + { + "epoch": 0.16986706056129985, + "grad_norm": 0.4955996123561873, + "learning_rate": 9.620253164556963e-06, + "loss": 0.3176, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2830328941345215, + "step": 115, + "valid_targets_mean": 4840.9, + "valid_targets_min": 454 + }, + { + "epoch": 0.17725258493353027, + "grad_norm": 0.5471230185393541, + "learning_rate": 1.0042194092827005e-05, + "loss": 0.3219, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.36016958951950073, + "step": 120, + "valid_targets_mean": 4376.1, + "valid_targets_min": 605 + }, + { + "epoch": 0.18463810930576072, + "grad_norm": 0.5075473057243887, + "learning_rate": 1.0464135021097049e-05, + "loss": 0.3135, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.314586877822876, + "step": 125, + "valid_targets_mean": 4974.8, + "valid_targets_min": 615 + }, + { + "epoch": 0.19202363367799113, + "grad_norm": 0.4679315871943788, + "learning_rate": 1.088607594936709e-05, + "loss": 0.2943, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27506357431411743, + "step": 130, + "valid_targets_mean": 5003.4, + "valid_targets_min": 309 + }, + { + "epoch": 0.19940915805022155, + "grad_norm": 0.5203002455514378, + "learning_rate": 1.1308016877637132e-05, + "loss": 0.3111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3530064821243286, + "step": 135, + "valid_targets_mean": 4744.3, + "valid_targets_min": 380 + }, + { + "epoch": 0.206794682422452, + "grad_norm": 0.508532245402902, + "learning_rate": 1.1729957805907175e-05, + "loss": 0.2799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26319918036460876, + "step": 140, + "valid_targets_mean": 4969.9, + "valid_targets_min": 298 + }, + { + "epoch": 0.21418020679468242, + "grad_norm": 0.4977333684095323, + "learning_rate": 1.2151898734177216e-05, + "loss": 0.2864, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.281037300825119, + "step": 145, + "valid_targets_mean": 4680.9, + "valid_targets_min": 553 + }, + { + "epoch": 0.22156573116691286, + "grad_norm": 0.5700488921766971, + "learning_rate": 1.2573839662447258e-05, + "loss": 0.3138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3317258059978485, + "step": 150, + "valid_targets_mean": 4780.8, + "valid_targets_min": 613 + }, + { + "epoch": 0.22895125553914328, + "grad_norm": 0.5049210471064856, + "learning_rate": 1.29957805907173e-05, + "loss": 0.2939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31507956981658936, + "step": 155, + "valid_targets_mean": 4951.3, + "valid_targets_min": 648 + }, + { + "epoch": 0.2363367799113737, + "grad_norm": 0.48733685753987915, + "learning_rate": 1.3417721518987344e-05, + "loss": 0.3144, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3532077670097351, + "step": 160, + "valid_targets_mean": 5704.9, + "valid_targets_min": 615 + }, + { + "epoch": 0.24372230428360414, + "grad_norm": 0.478477178779921, + "learning_rate": 1.3839662447257384e-05, + "loss": 0.2889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25617825984954834, + "step": 165, + "valid_targets_mean": 5201.0, + "valid_targets_min": 614 + }, + { + "epoch": 0.2511078286558346, + "grad_norm": 0.6185796972171613, + "learning_rate": 1.4261603375527426e-05, + "loss": 0.2979, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3018920421600342, + "step": 170, + "valid_targets_mean": 4082.6, + "valid_targets_min": 595 + }, + { + "epoch": 0.258493353028065, + "grad_norm": 0.5575122691401694, + "learning_rate": 1.468354430379747e-05, + "loss": 0.2849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2911016345024109, + "step": 175, + "valid_targets_mean": 4299.9, + "valid_targets_min": 662 + }, + { + "epoch": 0.2658788774002954, + "grad_norm": 0.6276828580042122, + "learning_rate": 1.5105485232067512e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25065547227859497, + "step": 180, + "valid_targets_mean": 4217.5, + "valid_targets_min": 388 + }, + { + "epoch": 0.27326440177252587, + "grad_norm": 0.4696050024959157, + "learning_rate": 1.5527426160337554e-05, + "loss": 0.2938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2493332028388977, + "step": 185, + "valid_targets_mean": 5711.5, + "valid_targets_min": 679 + }, + { + "epoch": 0.28064992614475626, + "grad_norm": 0.5623419824234117, + "learning_rate": 1.5949367088607598e-05, + "loss": 0.2589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27796900272369385, + "step": 190, + "valid_targets_mean": 4589.1, + "valid_targets_min": 592 + }, + { + "epoch": 0.2880354505169867, + "grad_norm": 0.45269545432048147, + "learning_rate": 1.637130801687764e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23348630964756012, + "step": 195, + "valid_targets_mean": 6195.4, + "valid_targets_min": 1406 + }, + { + "epoch": 0.29542097488921715, + "grad_norm": 0.5800039153836231, + "learning_rate": 1.679324894514768e-05, + "loss": 0.2751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3133583664894104, + "step": 200, + "valid_targets_mean": 4379.1, + "valid_targets_min": 577 + }, + { + "epoch": 0.30280649926144754, + "grad_norm": 0.4475247352688734, + "learning_rate": 1.7215189873417723e-05, + "loss": 0.2688, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2346859574317932, + "step": 205, + "valid_targets_mean": 5674.9, + "valid_targets_min": 516 + }, + { + "epoch": 0.310192023633678, + "grad_norm": 0.6043039588064852, + "learning_rate": 1.7637130801687767e-05, + "loss": 0.284, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2985689640045166, + "step": 210, + "valid_targets_mean": 3230.7, + "valid_targets_min": 451 + }, + { + "epoch": 0.3175775480059084, + "grad_norm": 0.46608512023497484, + "learning_rate": 1.8059071729957807e-05, + "loss": 0.2621, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2667817175388336, + "step": 215, + "valid_targets_mean": 5615.9, + "valid_targets_min": 608 + }, + { + "epoch": 0.3249630723781389, + "grad_norm": 0.6393799102975143, + "learning_rate": 1.848101265822785e-05, + "loss": 0.2857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3485134243965149, + "step": 220, + "valid_targets_mean": 5232.9, + "valid_targets_min": 551 + }, + { + "epoch": 0.33234859675036926, + "grad_norm": 0.4985956967264833, + "learning_rate": 1.890295358649789e-05, + "loss": 0.2504, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22727638483047485, + "step": 225, + "valid_targets_mean": 5186.1, + "valid_targets_min": 557 + }, + { + "epoch": 0.3397341211225997, + "grad_norm": 0.698867408214604, + "learning_rate": 1.9324894514767935e-05, + "loss": 0.277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28445443511009216, + "step": 230, + "valid_targets_mean": 4395.3, + "valid_targets_min": 613 + }, + { + "epoch": 0.34711964549483015, + "grad_norm": 0.4699161647524185, + "learning_rate": 1.974683544303798e-05, + "loss": 0.2863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27419084310531616, + "step": 235, + "valid_targets_mean": 6050.6, + "valid_targets_min": 697 + }, + { + "epoch": 0.35450516986706054, + "grad_norm": 0.49712308816298567, + "learning_rate": 2.0168776371308016e-05, + "loss": 0.2654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2411026805639267, + "step": 240, + "valid_targets_mean": 6215.0, + "valid_targets_min": 375 + }, + { + "epoch": 0.361890694239291, + "grad_norm": 0.4755998361435196, + "learning_rate": 2.0590717299578063e-05, + "loss": 0.2812, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.31807953119277954, + "step": 245, + "valid_targets_mean": 5977.3, + "valid_targets_min": 222 + }, + { + "epoch": 0.36927621861152143, + "grad_norm": 0.5522737703807714, + "learning_rate": 2.1012658227848103e-05, + "loss": 0.2728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2637110650539398, + "step": 250, + "valid_targets_mean": 4707.1, + "valid_targets_min": 424 + }, + { + "epoch": 0.3766617429837518, + "grad_norm": 0.5752807812899275, + "learning_rate": 2.1434599156118144e-05, + "loss": 0.2675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.252408891916275, + "step": 255, + "valid_targets_mean": 4646.9, + "valid_targets_min": 620 + }, + { + "epoch": 0.38404726735598227, + "grad_norm": 0.4839304938690229, + "learning_rate": 2.1856540084388188e-05, + "loss": 0.2899, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.311644971370697, + "step": 260, + "valid_targets_mean": 6014.4, + "valid_targets_min": 320 + }, + { + "epoch": 0.3914327917282127, + "grad_norm": 0.3679428184390633, + "learning_rate": 2.2278481012658228e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1715935468673706, + "step": 265, + "valid_targets_mean": 7149.1, + "valid_targets_min": 708 + }, + { + "epoch": 0.3988183161004431, + "grad_norm": 0.4388882366281187, + "learning_rate": 2.270042194092827e-05, + "loss": 0.2662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21887677907943726, + "step": 270, + "valid_targets_mean": 5259.7, + "valid_targets_min": 462 + }, + { + "epoch": 0.40620384047267355, + "grad_norm": 0.45156140818813745, + "learning_rate": 2.3122362869198316e-05, + "loss": 0.243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1979508399963379, + "step": 275, + "valid_targets_mean": 5245.2, + "valid_targets_min": 370 + }, + { + "epoch": 0.413589364844904, + "grad_norm": 0.5635183886045622, + "learning_rate": 2.3544303797468356e-05, + "loss": 0.2644, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2971973419189453, + "step": 280, + "valid_targets_mean": 4294.6, + "valid_targets_min": 250 + }, + { + "epoch": 0.42097488921713444, + "grad_norm": 0.6003559911338966, + "learning_rate": 2.39662447257384e-05, + "loss": 0.2701, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27018484473228455, + "step": 285, + "valid_targets_mean": 4037.1, + "valid_targets_min": 530 + }, + { + "epoch": 0.42836041358936483, + "grad_norm": 0.519146451009721, + "learning_rate": 2.438818565400844e-05, + "loss": 0.2749, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25247296690940857, + "step": 290, + "valid_targets_mean": 5198.4, + "valid_targets_min": 757 + }, + { + "epoch": 0.4357459379615953, + "grad_norm": 0.5295365016140835, + "learning_rate": 2.481012658227848e-05, + "loss": 0.2734, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30645301938056946, + "step": 295, + "valid_targets_mean": 5075.9, + "valid_targets_min": 586 + }, + { + "epoch": 0.4431314623338257, + "grad_norm": 0.5064359925137679, + "learning_rate": 2.5232067510548524e-05, + "loss": 0.2615, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27146783471107483, + "step": 300, + "valid_targets_mean": 5670.0, + "valid_targets_min": 510 + }, + { + "epoch": 0.4505169867060561, + "grad_norm": 0.4807884411055291, + "learning_rate": 2.5654008438818568e-05, + "loss": 0.2637, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.245832160115242, + "step": 305, + "valid_targets_mean": 4988.6, + "valid_targets_min": 758 + }, + { + "epoch": 0.45790251107828656, + "grad_norm": 0.4874621524760727, + "learning_rate": 2.6075949367088612e-05, + "loss": 0.2355, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22424304485321045, + "step": 310, + "valid_targets_mean": 4865.2, + "valid_targets_min": 690 + }, + { + "epoch": 0.465288035450517, + "grad_norm": 0.5241781469793773, + "learning_rate": 2.6497890295358652e-05, + "loss": 0.2674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2541046142578125, + "step": 315, + "valid_targets_mean": 4655.8, + "valid_targets_min": 303 + }, + { + "epoch": 0.4726735598227474, + "grad_norm": 0.6693714356739207, + "learning_rate": 2.6919831223628693e-05, + "loss": 0.2598, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3150583803653717, + "step": 320, + "valid_targets_mean": 3789.6, + "valid_targets_min": 323 + }, + { + "epoch": 0.48005908419497784, + "grad_norm": 0.4049583166705286, + "learning_rate": 2.7341772151898737e-05, + "loss": 0.2634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19988644123077393, + "step": 325, + "valid_targets_mean": 6250.1, + "valid_targets_min": 565 + }, + { + "epoch": 0.4874446085672083, + "grad_norm": 0.5683986043691779, + "learning_rate": 2.7763713080168777e-05, + "loss": 0.2955, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30885380506515503, + "step": 330, + "valid_targets_mean": 4539.6, + "valid_targets_min": 511 + }, + { + "epoch": 0.4948301329394387, + "grad_norm": 0.4293798580209713, + "learning_rate": 2.8185654008438824e-05, + "loss": 0.2476, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2373587191104889, + "step": 335, + "valid_targets_mean": 6893.3, + "valid_targets_min": 540 + }, + { + "epoch": 0.5022156573116692, + "grad_norm": 0.5051385898465901, + "learning_rate": 2.8607594936708865e-05, + "loss": 0.2575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2349412590265274, + "step": 340, + "valid_targets_mean": 5513.8, + "valid_targets_min": 348 + }, + { + "epoch": 0.5096011816838996, + "grad_norm": 0.453650649705282, + "learning_rate": 2.9029535864978905e-05, + "loss": 0.2487, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24615350365638733, + "step": 345, + "valid_targets_mean": 5636.8, + "valid_targets_min": 632 + }, + { + "epoch": 0.51698670605613, + "grad_norm": 0.5026094522177535, + "learning_rate": 2.945147679324895e-05, + "loss": 0.2865, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25473105907440186, + "step": 350, + "valid_targets_mean": 4648.2, + "valid_targets_min": 618 + }, + { + "epoch": 0.5243722304283605, + "grad_norm": 0.5473929641026576, + "learning_rate": 2.987341772151899e-05, + "loss": 0.2454, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25512436032295227, + "step": 355, + "valid_targets_mean": 4888.8, + "valid_targets_min": 455 + }, + { + "epoch": 0.5317577548005908, + "grad_norm": 0.6818407765763758, + "learning_rate": 3.029535864978903e-05, + "loss": 0.251, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26583975553512573, + "step": 360, + "valid_targets_mean": 3901.6, + "valid_targets_min": 393 + }, + { + "epoch": 0.5391432791728212, + "grad_norm": 0.43264528877861325, + "learning_rate": 3.0717299578059074e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2336602509021759, + "step": 365, + "valid_targets_mean": 6759.1, + "valid_targets_min": 654 + }, + { + "epoch": 0.5465288035450517, + "grad_norm": 0.4786744793085355, + "learning_rate": 3.113924050632912e-05, + "loss": 0.2568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23913979530334473, + "step": 370, + "valid_targets_mean": 5128.6, + "valid_targets_min": 436 + }, + { + "epoch": 0.5539143279172821, + "grad_norm": 0.6098930815067188, + "learning_rate": 3.156118143459916e-05, + "loss": 0.2664, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27685385942459106, + "step": 375, + "valid_targets_mean": 4722.4, + "valid_targets_min": 547 + }, + { + "epoch": 0.5612998522895125, + "grad_norm": 0.4884326875060275, + "learning_rate": 3.19831223628692e-05, + "loss": 0.2529, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.269244909286499, + "step": 380, + "valid_targets_mean": 5746.3, + "valid_targets_min": 649 + }, + { + "epoch": 0.568685376661743, + "grad_norm": 0.6966159287505779, + "learning_rate": 3.240506329113924e-05, + "loss": 0.2452, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25672098994255066, + "step": 385, + "valid_targets_mean": 5211.0, + "valid_targets_min": 451 + }, + { + "epoch": 0.5760709010339734, + "grad_norm": 0.4826433803477916, + "learning_rate": 3.282700421940928e-05, + "loss": 0.2588, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21904008090496063, + "step": 390, + "valid_targets_mean": 4840.0, + "valid_targets_min": 614 + }, + { + "epoch": 0.5834564254062038, + "grad_norm": 0.5161328067549831, + "learning_rate": 3.324894514767932e-05, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21546295285224915, + "step": 395, + "valid_targets_mean": 5000.8, + "valid_targets_min": 474 + }, + { + "epoch": 0.5908419497784343, + "grad_norm": 0.441903839632028, + "learning_rate": 3.367088607594937e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2418462634086609, + "step": 400, + "valid_targets_mean": 6752.2, + "valid_targets_min": 526 + }, + { + "epoch": 0.5982274741506647, + "grad_norm": 0.4263460620059315, + "learning_rate": 3.409282700421941e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564152479171753, + "step": 405, + "valid_targets_mean": 5806.5, + "valid_targets_min": 581 + }, + { + "epoch": 0.6056129985228951, + "grad_norm": 0.4114391570850889, + "learning_rate": 3.451476793248946e-05, + "loss": 0.2561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19688798487186432, + "step": 410, + "valid_targets_mean": 5193.4, + "valid_targets_min": 765 + }, + { + "epoch": 0.6129985228951256, + "grad_norm": 0.560472208109208, + "learning_rate": 3.49367088607595e-05, + "loss": 0.3089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3748968243598938, + "step": 415, + "valid_targets_mean": 6106.8, + "valid_targets_min": 673 + }, + { + "epoch": 0.620384047267356, + "grad_norm": 0.4389773616307953, + "learning_rate": 3.535864978902954e-05, + "loss": 0.2474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25939327478408813, + "step": 420, + "valid_targets_mean": 5600.4, + "valid_targets_min": 587 + }, + { + "epoch": 0.6277695716395865, + "grad_norm": 0.49005945851489136, + "learning_rate": 3.578059071729958e-05, + "loss": 0.2352, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2415715903043747, + "step": 425, + "valid_targets_mean": 5480.6, + "valid_targets_min": 557 + }, + { + "epoch": 0.6351550960118169, + "grad_norm": 0.6120232650144795, + "learning_rate": 3.6202531645569626e-05, + "loss": 0.2384, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27258408069610596, + "step": 430, + "valid_targets_mean": 4020.8, + "valid_targets_min": 575 + }, + { + "epoch": 0.6425406203840472, + "grad_norm": 0.5517761299010163, + "learning_rate": 3.6624472573839666e-05, + "loss": 0.248, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2571716904640198, + "step": 435, + "valid_targets_mean": 3986.9, + "valid_targets_min": 570 + }, + { + "epoch": 0.6499261447562777, + "grad_norm": 0.4519259500134533, + "learning_rate": 3.704641350210971e-05, + "loss": 0.2584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23544706404209137, + "step": 440, + "valid_targets_mean": 5446.1, + "valid_targets_min": 798 + }, + { + "epoch": 0.6573116691285081, + "grad_norm": 0.4627225505498187, + "learning_rate": 3.746835443037975e-05, + "loss": 0.2572, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22768300771713257, + "step": 445, + "valid_targets_mean": 5305.9, + "valid_targets_min": 612 + }, + { + "epoch": 0.6646971935007385, + "grad_norm": 0.5459862764397345, + "learning_rate": 3.7890295358649794e-05, + "loss": 0.2509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2680595815181732, + "step": 450, + "valid_targets_mean": 3753.4, + "valid_targets_min": 637 + }, + { + "epoch": 0.672082717872969, + "grad_norm": 0.5037347511570361, + "learning_rate": 3.8312236286919835e-05, + "loss": 0.2492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2467479407787323, + "step": 455, + "valid_targets_mean": 5501.6, + "valid_targets_min": 310 + }, + { + "epoch": 0.6794682422451994, + "grad_norm": 0.4327775289602796, + "learning_rate": 3.873417721518988e-05, + "loss": 0.2367, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19664692878723145, + "step": 460, + "valid_targets_mean": 5931.2, + "valid_targets_min": 913 + }, + { + "epoch": 0.6868537666174298, + "grad_norm": 0.4409637060996679, + "learning_rate": 3.915611814345992e-05, + "loss": 0.2239, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21506735682487488, + "step": 465, + "valid_targets_mean": 6059.1, + "valid_targets_min": 647 + }, + { + "epoch": 0.6942392909896603, + "grad_norm": 0.5504576504647792, + "learning_rate": 3.957805907172996e-05, + "loss": 0.2497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2300184667110443, + "step": 470, + "valid_targets_mean": 3839.0, + "valid_targets_min": 557 + }, + { + "epoch": 0.7016248153618907, + "grad_norm": 0.429274562141855, + "learning_rate": 4e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2237507700920105, + "step": 475, + "valid_targets_mean": 6357.5, + "valid_targets_min": 742 + }, + { + "epoch": 0.7090103397341211, + "grad_norm": 0.7536006693457217, + "learning_rate": 3.9999864355811366e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22905516624450684, + "step": 480, + "valid_targets_mean": 4381.5, + "valid_targets_min": 729 + }, + { + "epoch": 0.7163958641063516, + "grad_norm": 0.43953758941215776, + "learning_rate": 3.99994574250854e-05, + "loss": 0.2316, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20200951397418976, + "step": 485, + "valid_targets_mean": 5644.1, + "valid_targets_min": 683 + }, + { + "epoch": 0.723781388478582, + "grad_norm": 0.4723664561759756, + "learning_rate": 3.999877921334187e-05, + "loss": 0.2475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.26116621494293213, + "step": 490, + "valid_targets_mean": 5649.9, + "valid_targets_min": 598 + }, + { + "epoch": 0.7311669128508124, + "grad_norm": 0.5372942073144541, + "learning_rate": 3.999782972978035e-05, + "loss": 0.2515, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2597278356552124, + "step": 495, + "valid_targets_mean": 4376.8, + "valid_targets_min": 711 + }, + { + "epoch": 0.7385524372230429, + "grad_norm": 0.4923885109877066, + "learning_rate": 3.9996608987280005e-05, + "loss": 0.2537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3310204744338989, + "step": 500, + "valid_targets_mean": 5373.9, + "valid_targets_min": 362 + }, + { + "epoch": 0.7459379615952733, + "grad_norm": 0.4360411302968856, + "learning_rate": 3.999511700239951e-05, + "loss": 0.2356, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23595163226127625, + "step": 505, + "valid_targets_mean": 5867.7, + "valid_targets_min": 747 + }, + { + "epoch": 0.7533234859675036, + "grad_norm": 0.4125019822494474, + "learning_rate": 3.999335379537677e-05, + "loss": 0.2542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22982177138328552, + "step": 510, + "valid_targets_mean": 5860.2, + "valid_targets_min": 284 + }, + { + "epoch": 0.7607090103397341, + "grad_norm": 0.4816111850196653, + "learning_rate": 3.999131939012866e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23170827329158783, + "step": 515, + "valid_targets_mean": 5939.9, + "valid_targets_min": 573 + }, + { + "epoch": 0.7680945347119645, + "grad_norm": 0.5607421617686298, + "learning_rate": 3.998901381425072e-05, + "loss": 0.2555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27282047271728516, + "step": 520, + "valid_targets_mean": 4010.6, + "valid_targets_min": 308 + }, + { + "epoch": 0.7754800590841949, + "grad_norm": 0.4899434940882494, + "learning_rate": 3.998643709901673e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2315795123577118, + "step": 525, + "valid_targets_mean": 4446.6, + "valid_targets_min": 755 + }, + { + "epoch": 0.7828655834564254, + "grad_norm": 0.4551383882144779, + "learning_rate": 3.998358927937835e-05, + "loss": 0.2489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21953409910202026, + "step": 530, + "valid_targets_mean": 5531.2, + "valid_targets_min": 538 + }, + { + "epoch": 0.7902511078286558, + "grad_norm": 0.4743224847774692, + "learning_rate": 3.9980470393964584e-05, + "loss": 0.2604, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23994377255439758, + "step": 535, + "valid_targets_mean": 4992.5, + "valid_targets_min": 499 + }, + { + "epoch": 0.7976366322008862, + "grad_norm": 0.42125006599133685, + "learning_rate": 3.997708048508131e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19587251543998718, + "step": 540, + "valid_targets_mean": 5289.6, + "valid_targets_min": 667 + }, + { + "epoch": 0.8050221565731167, + "grad_norm": 0.524726561877271, + "learning_rate": 3.997341959871066e-05, + "loss": 0.2255, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21869216859340668, + "step": 545, + "valid_targets_mean": 4091.0, + "valid_targets_min": 641 + }, + { + "epoch": 0.8124076809453471, + "grad_norm": 1.0978518384034786, + "learning_rate": 3.996948778451044e-05, + "loss": 0.2382, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2922288179397583, + "step": 550, + "valid_targets_mean": 4755.1, + "valid_targets_min": 683 + }, + { + "epoch": 0.8197932053175776, + "grad_norm": 0.434588106606303, + "learning_rate": 3.996528509581343e-05, + "loss": 0.2525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2291834056377411, + "step": 555, + "valid_targets_mean": 6349.8, + "valid_targets_min": 911 + }, + { + "epoch": 0.827178729689808, + "grad_norm": 0.44781488594353586, + "learning_rate": 3.996081158962664e-05, + "loss": 0.2481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21667613089084625, + "step": 560, + "valid_targets_mean": 5273.2, + "valid_targets_min": 510 + }, + { + "epoch": 0.8345642540620384, + "grad_norm": 0.4232290679425604, + "learning_rate": 3.995606732663061e-05, + "loss": 0.2432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21859529614448547, + "step": 565, + "valid_targets_mean": 6958.2, + "valid_targets_min": 578 + }, + { + "epoch": 0.8419497784342689, + "grad_norm": 0.37603814378739714, + "learning_rate": 3.9951052371178484e-05, + "loss": 0.2041, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20742715895175934, + "step": 570, + "valid_targets_mean": 6439.2, + "valid_targets_min": 948 + }, + { + "epoch": 0.8493353028064993, + "grad_norm": 0.5330316503851572, + "learning_rate": 3.994576679129523e-05, + "loss": 0.2666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27818042039871216, + "step": 575, + "valid_targets_mean": 4627.2, + "valid_targets_min": 603 + }, + { + "epoch": 0.8567208271787297, + "grad_norm": 0.48524432401103307, + "learning_rate": 3.9940210658676674e-05, + "loss": 0.2502, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2559252977371216, + "step": 580, + "valid_targets_mean": 4795.9, + "valid_targets_min": 476 + }, + { + "epoch": 0.8641063515509602, + "grad_norm": 0.49687821950248273, + "learning_rate": 3.993438404868851e-05, + "loss": 0.2306, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22851648926734924, + "step": 585, + "valid_targets_mean": 5150.0, + "valid_targets_min": 523 + }, + { + "epoch": 0.8714918759231906, + "grad_norm": 0.4956097153548755, + "learning_rate": 3.9928287040365334e-05, + "loss": 0.2521, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.29340338706970215, + "step": 590, + "valid_targets_mean": 4955.4, + "valid_targets_min": 602 + }, + { + "epoch": 0.8788774002954209, + "grad_norm": 0.47621975231571884, + "learning_rate": 3.992191971640951e-05, + "loss": 0.2342, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27463576197624207, + "step": 595, + "valid_targets_mean": 5440.9, + "valid_targets_min": 602 + }, + { + "epoch": 0.8862629246676514, + "grad_norm": 0.4864192134736827, + "learning_rate": 3.9915282163190084e-05, + "loss": 0.2436, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.202743798494339, + "step": 600, + "valid_targets_mean": 4574.8, + "valid_targets_min": 529 + }, + { + "epoch": 0.8936484490398818, + "grad_norm": 0.4436441374970087, + "learning_rate": 3.990837447074162e-05, + "loss": 0.2602, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2884596586227417, + "step": 605, + "valid_targets_mean": 5406.1, + "valid_targets_min": 654 + }, + { + "epoch": 0.9010339734121122, + "grad_norm": 0.5091014640879844, + "learning_rate": 3.990119673276294e-05, + "loss": 0.2381, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2561118006706238, + "step": 610, + "valid_targets_mean": 3856.2, + "valid_targets_min": 395 + }, + { + "epoch": 0.9084194977843427, + "grad_norm": 0.44293254378519337, + "learning_rate": 3.989374904661589e-05, + "loss": 0.2338, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2175714075565338, + "step": 615, + "valid_targets_mean": 5307.1, + "valid_targets_min": 654 + }, + { + "epoch": 0.9158050221565731, + "grad_norm": 0.4969299448859931, + "learning_rate": 3.988603151332402e-05, + "loss": 0.2485, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23327939212322235, + "step": 620, + "valid_targets_mean": 4357.9, + "valid_targets_min": 932 + }, + { + "epoch": 0.9231905465288035, + "grad_norm": 0.399043646975211, + "learning_rate": 3.987804423757116e-05, + "loss": 0.2444, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2003873586654663, + "step": 625, + "valid_targets_mean": 6620.2, + "valid_targets_min": 717 + }, + { + "epoch": 0.930576070901034, + "grad_norm": 0.5569804334944528, + "learning_rate": 3.986978732770008e-05, + "loss": 0.2415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25180336833000183, + "step": 630, + "valid_targets_mean": 5835.9, + "valid_targets_min": 632 + }, + { + "epoch": 0.9379615952732644, + "grad_norm": 0.65226776693925, + "learning_rate": 3.986126089571096e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25059741735458374, + "step": 635, + "valid_targets_mean": 4920.8, + "valid_targets_min": 558 + }, + { + "epoch": 0.9453471196454948, + "grad_norm": 0.7108324020329172, + "learning_rate": 3.98524650572599e-05, + "loss": 0.2417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2292698323726654, + "step": 640, + "valid_targets_mean": 6018.6, + "valid_targets_min": 596 + }, + { + "epoch": 0.9527326440177253, + "grad_norm": 0.6100872297295562, + "learning_rate": 3.984339993165733e-05, + "loss": 0.2483, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2530120611190796, + "step": 645, + "valid_targets_mean": 4852.8, + "valid_targets_min": 491 + }, + { + "epoch": 0.9601181683899557, + "grad_norm": 0.6894230566439239, + "learning_rate": 3.9834065641866405e-05, + "loss": 0.2243, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2521514594554901, + "step": 650, + "valid_targets_mean": 4665.9, + "valid_targets_min": 651 + }, + { + "epoch": 0.9675036927621861, + "grad_norm": 0.6477076251824874, + "learning_rate": 3.9824462314501355e-05, + "loss": 0.2148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21362358331680298, + "step": 655, + "valid_targets_mean": 5620.9, + "valid_targets_min": 775 + }, + { + "epoch": 0.9748892171344166, + "grad_norm": 0.4865282373044303, + "learning_rate": 3.981459007982573e-05, + "loss": 0.2421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22964204847812653, + "step": 660, + "valid_targets_mean": 4150.7, + "valid_targets_min": 476 + }, + { + "epoch": 0.982274741506647, + "grad_norm": 0.4265759209620169, + "learning_rate": 3.980444907175065e-05, + "loss": 0.2221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23872928321361542, + "step": 665, + "valid_targets_mean": 4645.1, + "valid_targets_min": 490 + }, + { + "epoch": 0.9896602658788775, + "grad_norm": 0.4337931141291071, + "learning_rate": 3.979403942783301e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21394482254981995, + "step": 670, + "valid_targets_mean": 5586.5, + "valid_targets_min": 583 + }, + { + "epoch": 0.9970457902511078, + "grad_norm": 0.44216457771241374, + "learning_rate": 3.9783361289273565e-05, + "loss": 0.2307, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2214493751525879, + "step": 675, + "valid_targets_mean": 5189.8, + "valid_targets_min": 490 + }, + { + "epoch": 1.0044313146233383, + "grad_norm": 0.4033533200689087, + "learning_rate": 3.977241480091507e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21175044775009155, + "step": 680, + "valid_targets_mean": 6346.5, + "valid_targets_min": 2699 + }, + { + "epoch": 1.0118168389955686, + "grad_norm": 0.485561570780159, + "learning_rate": 3.976120011124027e-05, + "loss": 0.2282, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2394808530807495, + "step": 685, + "valid_targets_mean": 5191.0, + "valid_targets_min": 764 + }, + { + "epoch": 1.0192023633677991, + "grad_norm": 0.4391190385256597, + "learning_rate": 3.974971737236992e-05, + "loss": 0.217, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23937949538230896, + "step": 690, + "valid_targets_mean": 5908.1, + "valid_targets_min": 742 + }, + { + "epoch": 1.0265878877400296, + "grad_norm": 0.44514329511208256, + "learning_rate": 3.9737966740060694e-05, + "loss": 0.2508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20545028150081635, + "step": 695, + "valid_targets_mean": 4889.1, + "valid_targets_min": 246 + }, + { + "epoch": 1.03397341211226, + "grad_norm": 0.5296228485775536, + "learning_rate": 3.97259483737031e-05, + "loss": 0.2484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25395631790161133, + "step": 700, + "valid_targets_mean": 5731.1, + "valid_targets_min": 556 + }, + { + "epoch": 1.0413589364844904, + "grad_norm": 0.4073116341963712, + "learning_rate": 3.9713662436319283e-05, + "loss": 0.2603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.234715536236763, + "step": 705, + "valid_targets_mean": 5917.2, + "valid_targets_min": 781 + }, + { + "epoch": 1.048744460856721, + "grad_norm": 0.4815030594150368, + "learning_rate": 3.9701109094560844e-05, + "loss": 0.2399, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2784745395183563, + "step": 710, + "valid_targets_mean": 4806.1, + "valid_targets_min": 559 + }, + { + "epoch": 1.0561299852289512, + "grad_norm": 0.49302209706755173, + "learning_rate": 3.9688288518706576e-05, + "loss": 0.2118, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2195298820734024, + "step": 715, + "valid_targets_mean": 4878.7, + "valid_targets_min": 652 + }, + { + "epoch": 1.0635155096011817, + "grad_norm": 0.5556432641590098, + "learning_rate": 3.967520088266014e-05, + "loss": 0.2649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3123176097869873, + "step": 720, + "valid_targets_mean": 4661.2, + "valid_targets_min": 459 + }, + { + "epoch": 1.0709010339734122, + "grad_norm": 0.4534308645642078, + "learning_rate": 3.96618463639477e-05, + "loss": 0.2204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2006494402885437, + "step": 725, + "valid_targets_mean": 4573.6, + "valid_targets_min": 623 + }, + { + "epoch": 1.0782865583456425, + "grad_norm": 0.47188855620624076, + "learning_rate": 3.9648225143715565e-05, + "loss": 0.2215, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2375321090221405, + "step": 730, + "valid_targets_mean": 5050.3, + "valid_targets_min": 697 + }, + { + "epoch": 1.085672082717873, + "grad_norm": 0.5815500233739369, + "learning_rate": 3.9634337406727646e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2476039081811905, + "step": 735, + "valid_targets_mean": 4952.1, + "valid_targets_min": 620 + }, + { + "epoch": 1.0930576070901035, + "grad_norm": 0.44455506894558233, + "learning_rate": 3.962018334136304e-05, + "loss": 0.204, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2000984251499176, + "step": 740, + "valid_targets_mean": 5665.1, + "valid_targets_min": 564 + }, + { + "epoch": 1.1004431314623337, + "grad_norm": 0.41891782048261406, + "learning_rate": 3.960576313961342e-05, + "loss": 0.2056, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20635861158370972, + "step": 745, + "valid_targets_mean": 5981.8, + "valid_targets_min": 553 + }, + { + "epoch": 1.1078286558345642, + "grad_norm": 0.43106390225148533, + "learning_rate": 3.9591076997080425e-05, + "loss": 0.2206, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23344707489013672, + "step": 750, + "valid_targets_mean": 5677.6, + "valid_targets_min": 682 + }, + { + "epoch": 1.1152141802067947, + "grad_norm": 0.41866856940372654, + "learning_rate": 3.9576125112973064e-05, + "loss": 0.2259, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2010667622089386, + "step": 755, + "valid_targets_mean": 5764.6, + "valid_targets_min": 755 + }, + { + "epoch": 1.122599704579025, + "grad_norm": 0.3898308822960811, + "learning_rate": 3.956090769010495e-05, + "loss": 0.228, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21998938918113708, + "step": 760, + "valid_targets_mean": 7065.8, + "valid_targets_min": 606 + }, + { + "epoch": 1.1299852289512555, + "grad_norm": 0.4317215373352644, + "learning_rate": 3.954542493489158e-05, + "loss": 0.2377, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23063355684280396, + "step": 765, + "valid_targets_mean": 6418.8, + "valid_targets_min": 710 + }, + { + "epoch": 1.137370753323486, + "grad_norm": 0.4412576392418719, + "learning_rate": 3.952967705734752e-05, + "loss": 0.2179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20747151970863342, + "step": 770, + "valid_targets_mean": 5115.1, + "valid_targets_min": 600 + }, + { + "epoch": 1.1447562776957163, + "grad_norm": 0.5452994774328421, + "learning_rate": 3.951366427108359e-05, + "loss": 0.2195, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2622203826904297, + "step": 775, + "valid_targets_mean": 3728.9, + "valid_targets_min": 762 + }, + { + "epoch": 1.1521418020679468, + "grad_norm": 0.4161239474977219, + "learning_rate": 3.9497386793303934e-05, + "loss": 0.2184, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23140665888786316, + "step": 780, + "valid_targets_mean": 6516.1, + "valid_targets_min": 521 + }, + { + "epoch": 1.1595273264401773, + "grad_norm": 0.4996982771781497, + "learning_rate": 3.9480844844803065e-05, + "loss": 0.2289, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21708938479423523, + "step": 785, + "valid_targets_mean": 4092.6, + "valid_targets_min": 491 + }, + { + "epoch": 1.1669128508124076, + "grad_norm": 0.43107207086038685, + "learning_rate": 3.946403864996291e-05, + "loss": 0.222, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19202640652656555, + "step": 790, + "valid_targets_mean": 5623.8, + "valid_targets_min": 567 + }, + { + "epoch": 1.174298375184638, + "grad_norm": 0.6020812405004927, + "learning_rate": 3.944696843674973e-05, + "loss": 0.2265, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.267403244972229, + "step": 795, + "valid_targets_mean": 3391.2, + "valid_targets_min": 617 + }, + { + "epoch": 1.1816838995568686, + "grad_norm": 0.4663676848971227, + "learning_rate": 3.942963443671105e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2613796889781952, + "step": 800, + "valid_targets_mean": 5378.3, + "valid_targets_min": 552 + }, + { + "epoch": 1.1890694239290989, + "grad_norm": 0.4485332389236638, + "learning_rate": 3.9412036884972515e-05, + "loss": 0.2151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2017560601234436, + "step": 805, + "valid_targets_mean": 4622.5, + "valid_targets_min": 531 + }, + { + "epoch": 1.1964549483013294, + "grad_norm": 0.47317157878156907, + "learning_rate": 3.939417602023467e-05, + "loss": 0.2169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2027883231639862, + "step": 810, + "valid_targets_mean": 4890.3, + "valid_targets_min": 638 + }, + { + "epoch": 1.2038404726735599, + "grad_norm": 0.5193762458310359, + "learning_rate": 3.937605208476979e-05, + "loss": 0.2488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23114809393882751, + "step": 815, + "valid_targets_mean": 4104.7, + "valid_targets_min": 659 + }, + { + "epoch": 1.2112259970457901, + "grad_norm": 0.5116940935110126, + "learning_rate": 3.93576653244185e-05, + "loss": 0.24, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.3086828291416168, + "step": 820, + "valid_targets_mean": 5347.4, + "valid_targets_min": 692 + }, + { + "epoch": 1.2186115214180206, + "grad_norm": 0.6948645135111495, + "learning_rate": 3.933901598858653e-05, + "loss": 0.2134, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23770800232887268, + "step": 825, + "valid_targets_mean": 4581.4, + "valid_targets_min": 575 + }, + { + "epoch": 1.2259970457902511, + "grad_norm": 0.3666222705529873, + "learning_rate": 3.9320104330241294e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20173095166683197, + "step": 830, + "valid_targets_mean": 7597.5, + "valid_targets_min": 563 + }, + { + "epoch": 1.2333825701624814, + "grad_norm": 0.4553744544104897, + "learning_rate": 3.9300930605908434e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2501322031021118, + "step": 835, + "valid_targets_mean": 5175.3, + "valid_targets_min": 477 + }, + { + "epoch": 1.240768094534712, + "grad_norm": 0.4074174770824338, + "learning_rate": 3.928149507566838e-05, + "loss": 0.2287, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21756581962108612, + "step": 840, + "valid_targets_mean": 5838.4, + "valid_targets_min": 709 + }, + { + "epoch": 1.2481536189069424, + "grad_norm": 0.3903886869594378, + "learning_rate": 3.926179800315281e-05, + "loss": 0.2103, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19205650687217712, + "step": 845, + "valid_targets_mean": 5413.8, + "valid_targets_min": 497 + }, + { + "epoch": 1.2555391432791727, + "grad_norm": 0.48872055489952576, + "learning_rate": 3.924183965554106e-05, + "loss": 0.2295, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2304544299840927, + "step": 850, + "valid_targets_mean": 5223.9, + "valid_targets_min": 362 + }, + { + "epoch": 1.2629246676514032, + "grad_norm": 0.5971024562814735, + "learning_rate": 3.9221620303556515e-05, + "loss": 0.2035, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1931340992450714, + "step": 855, + "valid_targets_mean": 6235.6, + "valid_targets_min": 698 + }, + { + "epoch": 1.2703101920236337, + "grad_norm": 0.441865132454004, + "learning_rate": 3.9201140221462947e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19833971560001373, + "step": 860, + "valid_targets_mean": 4980.0, + "valid_targets_min": 229 + }, + { + "epoch": 1.277695716395864, + "grad_norm": 0.448378137812601, + "learning_rate": 3.918039968706076e-05, + "loss": 0.2303, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2166500836610794, + "step": 865, + "valid_targets_mean": 4924.2, + "valid_targets_min": 586 + }, + { + "epoch": 1.2850812407680945, + "grad_norm": 0.38685678373728877, + "learning_rate": 3.9159398981683245e-05, + "loss": 0.2115, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17539414763450623, + "step": 870, + "valid_targets_mean": 6035.1, + "valid_targets_min": 928 + }, + { + "epoch": 1.292466765140325, + "grad_norm": 0.4163449238539416, + "learning_rate": 3.9138138390192776e-05, + "loss": 0.218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2028162181377411, + "step": 875, + "valid_targets_mean": 5722.0, + "valid_targets_min": 752 + }, + { + "epoch": 1.2998522895125553, + "grad_norm": 0.5685979494164379, + "learning_rate": 3.911661820097691e-05, + "loss": 0.2277, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27309101819992065, + "step": 880, + "valid_targets_mean": 3300.4, + "valid_targets_min": 311 + }, + { + "epoch": 1.3072378138847858, + "grad_norm": 0.4369045779107333, + "learning_rate": 3.909483870594452e-05, + "loss": 0.2162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2015451192855835, + "step": 885, + "valid_targets_mean": 5269.7, + "valid_targets_min": 571 + }, + { + "epoch": 1.3146233382570163, + "grad_norm": 0.4193133405997814, + "learning_rate": 3.9072800200521785e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21048672497272491, + "step": 890, + "valid_targets_mean": 5464.1, + "valid_targets_min": 748 + }, + { + "epoch": 1.3220088626292466, + "grad_norm": 0.388531558281405, + "learning_rate": 3.905050298364824e-05, + "loss": 0.2062, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16509976983070374, + "step": 895, + "valid_targets_mean": 5191.9, + "valid_targets_min": 517 + }, + { + "epoch": 1.329394387001477, + "grad_norm": 0.47820157235033706, + "learning_rate": 3.9027947357772664e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2072378247976303, + "step": 900, + "valid_targets_mean": 4629.8, + "valid_targets_min": 702 + }, + { + "epoch": 1.3367799113737076, + "grad_norm": 0.4354737992607275, + "learning_rate": 3.900513362884901e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20593631267547607, + "step": 905, + "valid_targets_mean": 4609.3, + "valid_targets_min": 531 + }, + { + "epoch": 1.3441654357459378, + "grad_norm": 0.49663576162369527, + "learning_rate": 3.8982062106332264e-05, + "loss": 0.2397, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23853272199630737, + "step": 910, + "valid_targets_mean": 3835.1, + "valid_targets_min": 564 + }, + { + "epoch": 1.3515509601181683, + "grad_norm": 0.46267728260164404, + "learning_rate": 3.895873310317422e-05, + "loss": 0.2136, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20141030848026276, + "step": 915, + "valid_targets_mean": 4257.8, + "valid_targets_min": 807 + }, + { + "epoch": 1.3589364844903988, + "grad_norm": 0.4318042167691161, + "learning_rate": 3.893514693581924e-05, + "loss": 0.2379, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23438894748687744, + "step": 920, + "valid_targets_mean": 5685.5, + "valid_targets_min": 637 + }, + { + "epoch": 1.3663220088626291, + "grad_norm": 0.46703807691935706, + "learning_rate": 3.8911303924199976e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22170604765415192, + "step": 925, + "valid_targets_mean": 4579.6, + "valid_targets_min": 648 + }, + { + "epoch": 1.3737075332348596, + "grad_norm": 0.49253435998552936, + "learning_rate": 3.888720439173304e-05, + "loss": 0.2104, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18895632028579712, + "step": 930, + "valid_targets_mean": 5223.1, + "valid_targets_min": 874 + }, + { + "epoch": 1.3810930576070901, + "grad_norm": 0.37295680843818396, + "learning_rate": 3.886284866531457e-05, + "loss": 0.2459, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23819585144519806, + "step": 935, + "valid_targets_mean": 7344.1, + "valid_targets_min": 516 + }, + { + "epoch": 1.3884785819793206, + "grad_norm": 0.45860088434234825, + "learning_rate": 3.883823707531585e-05, + "loss": 0.2207, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20355328917503357, + "step": 940, + "valid_targets_mean": 4777.0, + "valid_targets_min": 822 + }, + { + "epoch": 1.395864106351551, + "grad_norm": 0.4813637468871037, + "learning_rate": 3.8813369955578795e-05, + "loss": 0.2443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24247971177101135, + "step": 945, + "valid_targets_mean": 4676.9, + "valid_targets_min": 461 + }, + { + "epoch": 1.4032496307237814, + "grad_norm": 0.44965533755939013, + "learning_rate": 3.878824764341143e-05, + "loss": 0.2358, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2948012351989746, + "step": 950, + "valid_targets_mean": 6391.8, + "valid_targets_min": 665 + }, + { + "epoch": 1.410635155096012, + "grad_norm": 0.4957030075557271, + "learning_rate": 3.876287047958331e-05, + "loss": 0.2266, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2564036250114441, + "step": 955, + "valid_targets_mean": 4930.7, + "valid_targets_min": 709 + }, + { + "epoch": 1.4180206794682422, + "grad_norm": 0.4689797114164809, + "learning_rate": 3.8737238808320936e-05, + "loss": 0.2226, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23838871717453003, + "step": 960, + "valid_targets_mean": 4520.2, + "valid_targets_min": 581 + }, + { + "epoch": 1.4254062038404727, + "grad_norm": 0.45226562564371103, + "learning_rate": 3.8711352977303015e-05, + "loss": 0.22, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19582833349704742, + "step": 965, + "valid_targets_mean": 4792.9, + "valid_targets_min": 336 + }, + { + "epoch": 1.4327917282127032, + "grad_norm": 0.4499057955750725, + "learning_rate": 3.868521333765581e-05, + "loss": 0.2247, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21399438381195068, + "step": 970, + "valid_targets_mean": 5207.8, + "valid_targets_min": 535 + }, + { + "epoch": 1.4401772525849335, + "grad_norm": 0.3550283452289212, + "learning_rate": 3.865882024394834e-05, + "loss": 0.2005, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17050230503082275, + "step": 975, + "valid_targets_mean": 6535.8, + "valid_targets_min": 578 + }, + { + "epoch": 1.447562776957164, + "grad_norm": 0.3851306477864997, + "learning_rate": 3.863217405418759e-05, + "loss": 0.216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.205580934882164, + "step": 980, + "valid_targets_mean": 5663.9, + "valid_targets_min": 868 + }, + { + "epoch": 1.4549483013293945, + "grad_norm": 0.5896596645032985, + "learning_rate": 3.8605275129813626e-05, + "loss": 0.246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21005357801914215, + "step": 985, + "valid_targets_mean": 4708.6, + "valid_targets_min": 768 + }, + { + "epoch": 1.4623338257016247, + "grad_norm": 0.48610764474272555, + "learning_rate": 3.857812383569473e-05, + "loss": 0.2101, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22247350215911865, + "step": 990, + "valid_targets_mean": 5245.6, + "valid_targets_min": 643 + }, + { + "epoch": 1.4697193500738552, + "grad_norm": 0.4265714039395906, + "learning_rate": 3.8550720540122436e-05, + "loss": 0.2177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2506152391433716, + "step": 995, + "valid_targets_mean": 5852.4, + "valid_targets_min": 679 + }, + { + "epoch": 1.4771048744460857, + "grad_norm": 0.418243711485139, + "learning_rate": 3.852306561480652e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.25498437881469727, + "step": 1000, + "valid_targets_mean": 6060.9, + "valid_targets_min": 585 + }, + { + "epoch": 1.4844903988183162, + "grad_norm": 0.5725357827213264, + "learning_rate": 3.8495159434869966e-05, + "loss": 0.231, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.222720205783844, + "step": 1005, + "valid_targets_mean": 5578.1, + "valid_targets_min": 424 + }, + { + "epoch": 1.4918759231905465, + "grad_norm": 0.7420773277261102, + "learning_rate": 3.846700237884389e-05, + "loss": 0.2322, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2858394980430603, + "step": 1010, + "valid_targets_mean": 3759.1, + "valid_targets_min": 407 + }, + { + "epoch": 1.499261447562777, + "grad_norm": 0.4640644425487349, + "learning_rate": 3.84385948286624e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21131858229637146, + "step": 1015, + "valid_targets_mean": 4703.7, + "valid_targets_min": 460 + }, + { + "epoch": 1.5066469719350075, + "grad_norm": 0.47403611928498096, + "learning_rate": 3.8409937169657393e-05, + "loss": 0.2401, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2791014313697815, + "step": 1020, + "valid_targets_mean": 5770.7, + "valid_targets_min": 572 + }, + { + "epoch": 1.5140324963072378, + "grad_norm": 0.3527301227310117, + "learning_rate": 3.8381029790553376e-05, + "loss": 0.2216, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21376456320285797, + "step": 1025, + "valid_targets_mean": 7688.9, + "valid_targets_min": 1061 + }, + { + "epoch": 1.5214180206794683, + "grad_norm": 0.48244790889065403, + "learning_rate": 3.8351873083462135e-05, + "loss": 0.2246, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22939425706863403, + "step": 1030, + "valid_targets_mean": 3952.4, + "valid_targets_min": 614 + }, + { + "epoch": 1.5288035450516988, + "grad_norm": 0.4441322237358841, + "learning_rate": 3.832246744387746e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19595670700073242, + "step": 1035, + "valid_targets_mean": 5734.9, + "valid_targets_min": 602 + }, + { + "epoch": 1.536189069423929, + "grad_norm": 0.42687842371627116, + "learning_rate": 3.829281327066977e-05, + "loss": 0.2278, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2039499282836914, + "step": 1040, + "valid_targets_mean": 5575.5, + "valid_targets_min": 567 + }, + { + "epoch": 1.5435745937961596, + "grad_norm": 0.4616898715899763, + "learning_rate": 3.826291096608068e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24281984567642212, + "step": 1045, + "valid_targets_mean": 4569.6, + "valid_targets_min": 566 + }, + { + "epoch": 1.55096011816839, + "grad_norm": 0.516188071513123, + "learning_rate": 3.823276093571758e-05, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23861096799373627, + "step": 1050, + "valid_targets_mean": 5593.9, + "valid_targets_min": 756 + }, + { + "epoch": 1.5583456425406204, + "grad_norm": 0.4323810628023057, + "learning_rate": 3.820236358854812e-05, + "loss": 0.1997, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18166664242744446, + "step": 1055, + "valid_targets_mean": 5191.4, + "valid_targets_min": 548 + }, + { + "epoch": 1.5657311669128509, + "grad_norm": 0.41249483128752723, + "learning_rate": 3.817171933689464e-05, + "loss": 0.2096, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21327346563339233, + "step": 1060, + "valid_targets_mean": 6157.4, + "valid_targets_min": 587 + }, + { + "epoch": 1.5731166912850814, + "grad_norm": 0.4280649794546595, + "learning_rate": 3.81408285964286e-05, + "loss": 0.2028, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2045423984527588, + "step": 1065, + "valid_targets_mean": 5231.2, + "valid_targets_min": 583 + }, + { + "epoch": 1.5805022156573116, + "grad_norm": 0.3567235937466317, + "learning_rate": 3.810969178616495e-05, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1872180700302124, + "step": 1070, + "valid_targets_mean": 6435.2, + "valid_targets_min": 828 + }, + { + "epoch": 1.5878877400295421, + "grad_norm": 0.3967044287073082, + "learning_rate": 3.807830932845643e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21867504715919495, + "step": 1075, + "valid_targets_mean": 5695.5, + "valid_targets_min": 370 + }, + { + "epoch": 1.5952732644017726, + "grad_norm": 0.4258406051484541, + "learning_rate": 3.8046681648987826e-05, + "loss": 0.2088, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21099427342414856, + "step": 1080, + "valid_targets_mean": 5131.6, + "valid_targets_min": 578 + }, + { + "epoch": 1.602658788774003, + "grad_norm": 0.4734317720239788, + "learning_rate": 3.801480917677025e-05, + "loss": 0.2409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20774179697036743, + "step": 1085, + "valid_targets_mean": 3967.3, + "valid_targets_min": 535 + }, + { + "epoch": 1.6100443131462334, + "grad_norm": 0.44845704175379064, + "learning_rate": 3.798269234413525e-05, + "loss": 0.2214, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22882190346717834, + "step": 1090, + "valid_targets_mean": 4234.2, + "valid_targets_min": 453 + }, + { + "epoch": 1.617429837518464, + "grad_norm": 0.43870094592908176, + "learning_rate": 3.795033158672901e-05, + "loss": 0.2071, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20205071568489075, + "step": 1095, + "valid_targets_mean": 4710.2, + "valid_targets_min": 390 + }, + { + "epoch": 1.6248153618906942, + "grad_norm": 0.46114331054142615, + "learning_rate": 3.7917727343506384e-05, + "loss": 0.2268, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2182081788778305, + "step": 1100, + "valid_targets_mean": 4210.6, + "valid_targets_min": 630 + }, + { + "epoch": 1.6322008862629247, + "grad_norm": 0.46898057951501565, + "learning_rate": 3.788488005672499e-05, + "loss": 0.2218, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18588444590568542, + "step": 1105, + "valid_targets_mean": 5456.4, + "valid_targets_min": 697 + }, + { + "epoch": 1.6395864106351552, + "grad_norm": 0.46882204005729433, + "learning_rate": 3.785179017193918e-05, + "loss": 0.2269, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2911528944969177, + "step": 1110, + "valid_targets_mean": 5871.7, + "valid_targets_min": 546 + }, + { + "epoch": 1.6469719350073855, + "grad_norm": 0.5024285712997323, + "learning_rate": 3.781845813799402e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2931421101093292, + "step": 1115, + "valid_targets_mean": 4670.3, + "valid_targets_min": 451 + }, + { + "epoch": 1.654357459379616, + "grad_norm": 0.42105185416660457, + "learning_rate": 3.778488440701918e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22011511027812958, + "step": 1120, + "valid_targets_mean": 4687.1, + "valid_targets_min": 298 + }, + { + "epoch": 1.6617429837518465, + "grad_norm": 0.4534305852538249, + "learning_rate": 3.77510694344228e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18383678793907166, + "step": 1125, + "valid_targets_mean": 5405.8, + "valid_targets_min": 726 + }, + { + "epoch": 1.6691285081240768, + "grad_norm": 0.3951289835886458, + "learning_rate": 3.771701367888534e-05, + "loss": 0.2383, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17336603999137878, + "step": 1130, + "valid_targets_mean": 5009.2, + "valid_targets_min": 395 + }, + { + "epoch": 1.6765140324963073, + "grad_norm": 0.4886944664495892, + "learning_rate": 3.7682717602353336e-05, + "loss": 0.2271, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2188146710395813, + "step": 1135, + "valid_targets_mean": 3926.0, + "valid_targets_min": 310 + }, + { + "epoch": 1.6838995568685378, + "grad_norm": 0.39480650462231015, + "learning_rate": 3.7648181670033125e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20998279750347137, + "step": 1140, + "valid_targets_mean": 6034.9, + "valid_targets_min": 592 + }, + { + "epoch": 1.691285081240768, + "grad_norm": 0.39214825097931927, + "learning_rate": 3.761340635038456e-05, + "loss": 0.2173, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1979064792394638, + "step": 1145, + "valid_targets_mean": 7045.8, + "valid_targets_min": 525 + }, + { + "epoch": 1.6986706056129985, + "grad_norm": 0.38111774464102766, + "learning_rate": 3.7578392115114655e-05, + "loss": 0.2119, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20325309038162231, + "step": 1150, + "valid_targets_mean": 6202.9, + "valid_targets_min": 630 + }, + { + "epoch": 1.706056129985229, + "grad_norm": 0.404770101827888, + "learning_rate": 3.754313943917115e-05, + "loss": 0.1936, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1922975480556488, + "step": 1155, + "valid_targets_mean": 5763.9, + "valid_targets_min": 528 + }, + { + "epoch": 1.7134416543574593, + "grad_norm": 0.4064959538938459, + "learning_rate": 3.7507648800736116e-05, + "loss": 0.2044, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2195628136396408, + "step": 1160, + "valid_targets_mean": 5726.2, + "valid_targets_min": 795 + }, + { + "epoch": 1.7208271787296898, + "grad_norm": 0.5078776991250893, + "learning_rate": 3.747192068121943e-05, + "loss": 0.2166, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24034610390663147, + "step": 1165, + "valid_targets_mean": 4283.9, + "valid_targets_min": 309 + }, + { + "epoch": 1.7282127031019203, + "grad_norm": 0.5067333046365079, + "learning_rate": 3.743595556525228e-05, + "loss": 0.2192, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2246871292591095, + "step": 1170, + "valid_targets_mean": 3702.9, + "valid_targets_min": 529 + }, + { + "epoch": 1.7355982274741506, + "grad_norm": 0.5019937895256118, + "learning_rate": 3.7399753940680556e-05, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20047210156917572, + "step": 1175, + "valid_targets_mean": 3960.8, + "valid_targets_min": 472 + }, + { + "epoch": 1.742983751846381, + "grad_norm": 0.41047915116673045, + "learning_rate": 3.736331629855826e-05, + "loss": 0.2262, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2049740105867386, + "step": 1180, + "valid_targets_mean": 5736.0, + "valid_targets_min": 815 + }, + { + "epoch": 1.7503692762186116, + "grad_norm": 0.3945106153005012, + "learning_rate": 3.7326643133140833e-05, + "loss": 0.2165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20625770092010498, + "step": 1185, + "valid_targets_mean": 6195.1, + "valid_targets_min": 250 + }, + { + "epoch": 1.7577548005908419, + "grad_norm": 0.4106742085350737, + "learning_rate": 3.7289734941878455e-05, + "loss": 0.2029, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24206054210662842, + "step": 1190, + "valid_targets_mean": 6082.2, + "valid_targets_min": 568 + }, + { + "epoch": 1.7651403249630724, + "grad_norm": 0.5200799032564574, + "learning_rate": 3.7252592225409285e-05, + "loss": 0.2448, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28392231464385986, + "step": 1195, + "valid_targets_mean": 3605.9, + "valid_targets_min": 523 + }, + { + "epoch": 1.7725258493353029, + "grad_norm": 0.470687041134897, + "learning_rate": 3.721521548755269e-05, + "loss": 0.221, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21265387535095215, + "step": 1200, + "valid_targets_mean": 4151.9, + "valid_targets_min": 541 + }, + { + "epoch": 1.7799113737075332, + "grad_norm": 0.4428156443932358, + "learning_rate": 3.71776052353024e-05, + "loss": 0.2121, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21042832732200623, + "step": 1205, + "valid_targets_mean": 5545.3, + "valid_targets_min": 701 + }, + { + "epoch": 1.7872968980797637, + "grad_norm": 0.45448533560080073, + "learning_rate": 3.7139761978819625e-05, + "loss": 0.2369, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.27311086654663086, + "step": 1210, + "valid_targets_mean": 5298.8, + "valid_targets_min": 628 + }, + { + "epoch": 1.7946824224519942, + "grad_norm": 0.4157213284903144, + "learning_rate": 3.7101686231426145e-05, + "loss": 0.2111, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21213330328464508, + "step": 1215, + "valid_targets_mean": 5659.4, + "valid_targets_min": 830 + }, + { + "epoch": 1.8020679468242244, + "grad_norm": 0.3921888627239872, + "learning_rate": 3.706337850959736e-05, + "loss": 0.2073, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20621275901794434, + "step": 1220, + "valid_targets_mean": 6066.2, + "valid_targets_min": 679 + }, + { + "epoch": 1.809453471196455, + "grad_norm": 0.393703088132589, + "learning_rate": 3.702483933295524e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20170754194259644, + "step": 1225, + "valid_targets_mean": 5131.7, + "valid_targets_min": 594 + }, + { + "epoch": 1.8168389955686854, + "grad_norm": 0.429085603448917, + "learning_rate": 3.698606922426133e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21911361813545227, + "step": 1230, + "valid_targets_mean": 6525.8, + "valid_targets_min": 673 + }, + { + "epoch": 1.8242245199409157, + "grad_norm": 0.41690498546700655, + "learning_rate": 3.694706870940961e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20087522268295288, + "step": 1235, + "valid_targets_mean": 5197.6, + "valid_targets_min": 656 + }, + { + "epoch": 1.8316100443131462, + "grad_norm": 0.4214260567191511, + "learning_rate": 3.690783831741942e-05, + "loss": 0.1988, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24707287549972534, + "step": 1240, + "valid_targets_mean": 5131.9, + "valid_targets_min": 642 + }, + { + "epoch": 1.8389955686853767, + "grad_norm": 0.42497684417714315, + "learning_rate": 3.686837858042821e-05, + "loss": 0.2155, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2418084293603897, + "step": 1245, + "valid_targets_mean": 6411.4, + "valid_targets_min": 726 + }, + { + "epoch": 1.846381093057607, + "grad_norm": 0.43456506500215386, + "learning_rate": 3.6828690033684394e-05, + "loss": 0.205, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23421086370944977, + "step": 1250, + "valid_targets_mean": 4884.8, + "valid_targets_min": 637 + }, + { + "epoch": 1.8537666174298375, + "grad_norm": 0.3590270619837384, + "learning_rate": 3.678877321554003e-05, + "loss": 0.2317, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19189848005771637, + "step": 1255, + "valid_targets_mean": 6340.3, + "valid_targets_min": 588 + }, + { + "epoch": 1.861152141802068, + "grad_norm": 0.4491792252049705, + "learning_rate": 3.674862866744358e-05, + "loss": 0.2238, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23957936465740204, + "step": 1260, + "valid_targets_mean": 4542.8, + "valid_targets_min": 638 + }, + { + "epoch": 1.8685376661742983, + "grad_norm": 0.41595374872963425, + "learning_rate": 3.6708256933932485e-05, + "loss": 0.1888, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19980810582637787, + "step": 1265, + "valid_targets_mean": 5785.6, + "valid_targets_min": 379 + }, + { + "epoch": 1.8759231905465288, + "grad_norm": 0.47288076543534235, + "learning_rate": 3.6667658562625867e-05, + "loss": 0.2308, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2654092311859131, + "step": 1270, + "valid_targets_mean": 5471.1, + "valid_targets_min": 666 + }, + { + "epoch": 1.8833087149187593, + "grad_norm": 0.38192384234870763, + "learning_rate": 3.662683410421703e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19015946984291077, + "step": 1275, + "valid_targets_mean": 5607.4, + "valid_targets_min": 810 + }, + { + "epoch": 1.8906942392909896, + "grad_norm": 0.4085790502454245, + "learning_rate": 3.6585784112466034e-05, + "loss": 0.2145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23992851376533508, + "step": 1280, + "valid_targets_mean": 5566.8, + "valid_targets_min": 706 + }, + { + "epoch": 1.89807976366322, + "grad_norm": 0.3923685608209056, + "learning_rate": 3.6544509144192153e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19036059081554413, + "step": 1285, + "valid_targets_mean": 5585.4, + "valid_targets_min": 766 + }, + { + "epoch": 1.9054652880354506, + "grad_norm": 0.460825966848905, + "learning_rate": 3.650300975926635e-05, + "loss": 0.2309, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20123234391212463, + "step": 1290, + "valid_targets_mean": 4738.8, + "valid_targets_min": 476 + }, + { + "epoch": 1.9128508124076808, + "grad_norm": 0.46958242857442417, + "learning_rate": 3.646128652060367e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20810635387897491, + "step": 1295, + "valid_targets_mean": 4939.5, + "valid_targets_min": 640 + }, + { + "epoch": 1.9202363367799113, + "grad_norm": 0.513107015645425, + "learning_rate": 3.6419339994155596e-05, + "loss": 0.2294, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23249667882919312, + "step": 1300, + "valid_targets_mean": 3900.5, + "valid_targets_min": 512 + }, + { + "epoch": 1.9276218611521418, + "grad_norm": 0.41491988804017377, + "learning_rate": 3.637717074890238e-05, + "loss": 0.1856, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20350153744220734, + "step": 1305, + "valid_targets_mean": 5207.5, + "valid_targets_min": 687 + }, + { + "epoch": 1.9350073855243721, + "grad_norm": 0.3852016695481521, + "learning_rate": 3.633477935684532e-05, + "loss": 0.1985, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16912466287612915, + "step": 1310, + "valid_targets_mean": 5948.2, + "valid_targets_min": 558 + }, + { + "epoch": 1.9423929098966026, + "grad_norm": 0.47994385762704583, + "learning_rate": 3.629216639299903e-05, + "loss": 0.2023, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2138756513595581, + "step": 1315, + "valid_targets_mean": 4660.0, + "valid_targets_min": 546 + }, + { + "epoch": 1.9497784342688331, + "grad_norm": 0.43044739199814863, + "learning_rate": 3.624933243538359e-05, + "loss": 0.2237, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20759987831115723, + "step": 1320, + "valid_targets_mean": 4612.2, + "valid_targets_min": 586 + }, + { + "epoch": 1.9571639586410634, + "grad_norm": 0.44206360398309225, + "learning_rate": 3.6206278065016744e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20062926411628723, + "step": 1325, + "valid_targets_mean": 4257.2, + "valid_targets_min": 474 + }, + { + "epoch": 1.964549483013294, + "grad_norm": 0.832328393558017, + "learning_rate": 3.616300386590601e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2263094186782837, + "step": 1330, + "valid_targets_mean": 3523.4, + "valid_targets_min": 320 + }, + { + "epoch": 1.9719350073855244, + "grad_norm": 0.5608605363552897, + "learning_rate": 3.6119510425040746e-05, + "loss": 0.2153, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2241973727941513, + "step": 1335, + "valid_targets_mean": 3184.9, + "valid_targets_min": 400 + }, + { + "epoch": 1.9793205317577547, + "grad_norm": 0.42827418469595757, + "learning_rate": 3.6075798332384205e-05, + "loss": 0.2075, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21591341495513916, + "step": 1340, + "valid_targets_mean": 5014.1, + "valid_targets_min": 703 + }, + { + "epoch": 1.9867060561299852, + "grad_norm": 0.4376829994779159, + "learning_rate": 3.603186818086552e-05, + "loss": 0.2229, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24252906441688538, + "step": 1345, + "valid_targets_mean": 5612.9, + "valid_targets_min": 843 + }, + { + "epoch": 1.9940915805022157, + "grad_norm": 0.48296281574524147, + "learning_rate": 3.598772056637166e-05, + "loss": 0.2326, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2911834716796875, + "step": 1350, + "valid_targets_mean": 5862.9, + "valid_targets_min": 542 + }, + { + "epoch": 2.001477104874446, + "grad_norm": 0.4285021994401378, + "learning_rate": 3.594335608773937e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17348650097846985, + "step": 1355, + "valid_targets_mean": 5699.8, + "valid_targets_min": 622 + }, + { + "epoch": 2.0088626292466767, + "grad_norm": 0.5571499078773812, + "learning_rate": 3.589877534674702e-05, + "loss": 0.2235, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21984922885894775, + "step": 1360, + "valid_targets_mean": 4844.9, + "valid_targets_min": 310 + }, + { + "epoch": 2.016248153618907, + "grad_norm": 0.4762775499228357, + "learning_rate": 3.585397894810645e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2565491497516632, + "step": 1365, + "valid_targets_mean": 5172.8, + "valid_targets_min": 531 + }, + { + "epoch": 2.0236336779911372, + "grad_norm": 0.4890064307141238, + "learning_rate": 3.580896749945478e-05, + "loss": 0.1871, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.198519766330719, + "step": 1370, + "valid_targets_mean": 5031.6, + "valid_targets_min": 709 + }, + { + "epoch": 2.031019202363368, + "grad_norm": 0.42188127103720335, + "learning_rate": 3.576374161134614e-05, + "loss": 0.1924, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17222025990486145, + "step": 1375, + "valid_targets_mean": 5100.7, + "valid_targets_min": 506 + }, + { + "epoch": 2.0384047267355982, + "grad_norm": 0.4032153526008752, + "learning_rate": 3.571830189724344e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22092564404010773, + "step": 1380, + "valid_targets_mean": 6401.4, + "valid_targets_min": 733 + }, + { + "epoch": 2.0457902511078285, + "grad_norm": 0.4090149314677475, + "learning_rate": 3.5672648973509975e-05, + "loss": 0.2037, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19857646524906158, + "step": 1385, + "valid_targets_mean": 5839.8, + "valid_targets_min": 589 + }, + { + "epoch": 2.0531757754800593, + "grad_norm": 0.4666629189772267, + "learning_rate": 3.5626783459401136e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22908130288124084, + "step": 1390, + "valid_targets_mean": 4732.4, + "valid_targets_min": 481 + }, + { + "epoch": 2.0605612998522895, + "grad_norm": 0.5167728208836535, + "learning_rate": 3.558070597705597e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.30561476945877075, + "step": 1395, + "valid_targets_mean": 5980.4, + "valid_targets_min": 499 + }, + { + "epoch": 2.06794682422452, + "grad_norm": 0.4957521107813994, + "learning_rate": 3.553441715148874e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24766743183135986, + "step": 1400, + "valid_targets_mean": 5423.2, + "valid_targets_min": 636 + }, + { + "epoch": 2.0753323485967505, + "grad_norm": 0.5065232042372999, + "learning_rate": 3.5487917610580464e-05, + "loss": 0.1913, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19253401458263397, + "step": 1405, + "valid_targets_mean": 4459.0, + "valid_targets_min": 731 + }, + { + "epoch": 2.082717872968981, + "grad_norm": 0.4366761703530371, + "learning_rate": 3.5441207985070405e-05, + "loss": 0.2008, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20609664916992188, + "step": 1410, + "valid_targets_mean": 5449.8, + "valid_targets_min": 973 + }, + { + "epoch": 2.090103397341211, + "grad_norm": 0.3956592099323369, + "learning_rate": 3.5394288908547476e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1563621610403061, + "step": 1415, + "valid_targets_mean": 5519.1, + "valid_targets_min": 520 + }, + { + "epoch": 2.097488921713442, + "grad_norm": 0.5353026127089262, + "learning_rate": 3.534716101744168e-05, + "loss": 0.1847, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1830843836069107, + "step": 1420, + "valid_targets_mean": 3676.1, + "valid_targets_min": 541 + }, + { + "epoch": 2.104874446085672, + "grad_norm": 0.4240842603893816, + "learning_rate": 3.529982495101549e-05, + "loss": 0.2007, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1899888962507248, + "step": 1425, + "valid_targets_mean": 5484.6, + "valid_targets_min": 573 + }, + { + "epoch": 2.1122599704579024, + "grad_norm": 0.4026373876082126, + "learning_rate": 3.5252281351355124e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18199166655540466, + "step": 1430, + "valid_targets_mean": 5762.3, + "valid_targets_min": 564 + }, + { + "epoch": 2.119645494830133, + "grad_norm": 0.40632759254314066, + "learning_rate": 3.520453086336188e-05, + "loss": 0.2097, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1754504293203354, + "step": 1435, + "valid_targets_mean": 5398.9, + "valid_targets_min": 642 + }, + { + "epoch": 2.1270310192023634, + "grad_norm": 0.35808057513210484, + "learning_rate": 3.515657413474339e-05, + "loss": 0.1778, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13614241778850555, + "step": 1440, + "valid_targets_mean": 6742.3, + "valid_targets_min": 736 + }, + { + "epoch": 2.1344165435745936, + "grad_norm": 0.4139451850923316, + "learning_rate": 3.5108411816004796e-05, + "loss": 0.1791, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15198799967765808, + "step": 1445, + "valid_targets_mean": 4605.1, + "valid_targets_min": 696 + }, + { + "epoch": 2.1418020679468244, + "grad_norm": 0.4487084052336881, + "learning_rate": 3.506004456043997e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18390825390815735, + "step": 1450, + "valid_targets_mean": 4883.0, + "valid_targets_min": 631 + }, + { + "epoch": 2.1491875923190547, + "grad_norm": 0.4525728518238653, + "learning_rate": 3.501147302412263e-05, + "loss": 0.2171, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22244812548160553, + "step": 1455, + "valid_targets_mean": 4889.0, + "valid_targets_min": 640 + }, + { + "epoch": 2.156573116691285, + "grad_norm": 0.4649134318287312, + "learning_rate": 3.496269786589743e-05, + "loss": 0.2049, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19458895921707153, + "step": 1460, + "valid_targets_mean": 5381.3, + "valid_targets_min": 727 + }, + { + "epoch": 2.1639586410635157, + "grad_norm": 0.3972438491459193, + "learning_rate": 3.491371974737105e-05, + "loss": 0.2172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19966194033622742, + "step": 1465, + "valid_targets_mean": 6120.7, + "valid_targets_min": 289 + }, + { + "epoch": 2.171344165435746, + "grad_norm": 0.4119929448789575, + "learning_rate": 3.486453933290321e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2048000693321228, + "step": 1470, + "valid_targets_mean": 5923.5, + "valid_targets_min": 695 + }, + { + "epoch": 2.178729689807976, + "grad_norm": 0.43265541786608985, + "learning_rate": 3.481515728959764e-05, + "loss": 0.2107, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19329239428043365, + "step": 1475, + "valid_targets_mean": 5209.0, + "valid_targets_min": 559 + }, + { + "epoch": 2.186115214180207, + "grad_norm": 0.726326713865377, + "learning_rate": 3.4765574287293064e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22369539737701416, + "step": 1480, + "valid_targets_mean": 3706.1, + "valid_targets_min": 348 + }, + { + "epoch": 2.193500738552437, + "grad_norm": 0.4285241209732484, + "learning_rate": 3.47157909985541e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1844395101070404, + "step": 1485, + "valid_targets_mean": 5193.9, + "valid_targets_min": 655 + }, + { + "epoch": 2.2008862629246675, + "grad_norm": 0.3568827709297404, + "learning_rate": 3.466580809866213e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16118377447128296, + "step": 1490, + "valid_targets_mean": 5637.3, + "valid_targets_min": 616 + }, + { + "epoch": 2.208271787296898, + "grad_norm": 0.40674844742126304, + "learning_rate": 3.461562626560613e-05, + "loss": 0.2318, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1650213599205017, + "step": 1495, + "valid_targets_mean": 5220.6, + "valid_targets_min": 578 + }, + { + "epoch": 2.2156573116691285, + "grad_norm": 0.40044238504347957, + "learning_rate": 3.456524618007352e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18922209739685059, + "step": 1500, + "valid_targets_mean": 5842.4, + "valid_targets_min": 680 + }, + { + "epoch": 2.2230428360413588, + "grad_norm": 0.46819866949961275, + "learning_rate": 3.451466852544087e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22434237599372864, + "step": 1505, + "valid_targets_mean": 4454.3, + "valid_targets_min": 585 + }, + { + "epoch": 2.2304283604135895, + "grad_norm": 0.358887635540945, + "learning_rate": 3.446389398776468e-05, + "loss": 0.1845, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16988348960876465, + "step": 1510, + "valid_targets_mean": 6279.1, + "valid_targets_min": 710 + }, + { + "epoch": 2.2378138847858198, + "grad_norm": 0.40829806216099035, + "learning_rate": 3.441292325577204e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1971379816532135, + "step": 1515, + "valid_targets_mean": 6296.1, + "valid_targets_min": 822 + }, + { + "epoch": 2.24519940915805, + "grad_norm": 0.40160684926373197, + "learning_rate": 3.436175702085132e-05, + "loss": 0.2108, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17605721950531006, + "step": 1520, + "valid_targets_mean": 5582.8, + "valid_targets_min": 486 + }, + { + "epoch": 2.2525849335302808, + "grad_norm": 0.5229993777219565, + "learning_rate": 3.431039597704275e-05, + "loss": 0.2142, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23692980408668518, + "step": 1525, + "valid_targets_mean": 4018.5, + "valid_targets_min": 479 + }, + { + "epoch": 2.259970457902511, + "grad_norm": 0.6849225115875504, + "learning_rate": 3.425884082102904e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.28416967391967773, + "step": 1530, + "valid_targets_mean": 2544.4, + "valid_targets_min": 400 + }, + { + "epoch": 2.2673559822747413, + "grad_norm": 0.4227339555943465, + "learning_rate": 3.4207092252125934e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20092085003852844, + "step": 1535, + "valid_targets_mean": 5492.9, + "valid_targets_min": 654 + }, + { + "epoch": 2.274741506646972, + "grad_norm": 0.4345429425277249, + "learning_rate": 3.415515097227269e-05, + "loss": 0.2065, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21635641157627106, + "step": 1540, + "valid_targets_mean": 5030.1, + "valid_targets_min": 489 + }, + { + "epoch": 2.2821270310192023, + "grad_norm": 0.4693229139379586, + "learning_rate": 3.4103017686022584e-05, + "loss": 0.2053, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2364303320646286, + "step": 1545, + "valid_targets_mean": 4731.6, + "valid_targets_min": 562 + }, + { + "epoch": 2.2895125553914326, + "grad_norm": 0.38590061526432556, + "learning_rate": 3.4050693100533334e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16934344172477722, + "step": 1550, + "valid_targets_mean": 6597.2, + "valid_targets_min": 590 + }, + { + "epoch": 2.2968980797636633, + "grad_norm": 0.3661821378036947, + "learning_rate": 3.399817792555756e-05, + "loss": 0.1917, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15037478506565094, + "step": 1555, + "valid_targets_mean": 5859.4, + "valid_targets_min": 620 + }, + { + "epoch": 2.3042836041358936, + "grad_norm": 0.5749118981748869, + "learning_rate": 3.394547287343307e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21516035497188568, + "step": 1560, + "valid_targets_mean": 3785.1, + "valid_targets_min": 308 + }, + { + "epoch": 2.311669128508124, + "grad_norm": 0.5222981613404089, + "learning_rate": 3.389257865907329e-05, + "loss": 0.2128, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2672545909881592, + "step": 1565, + "valid_targets_mean": 4263.8, + "valid_targets_min": 562 + }, + { + "epoch": 2.3190546528803546, + "grad_norm": 0.39866742666796046, + "learning_rate": 3.383949599995747e-05, + "loss": 0.2183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20826904475688934, + "step": 1570, + "valid_targets_mean": 6104.1, + "valid_targets_min": 490 + }, + { + "epoch": 2.326440177252585, + "grad_norm": 0.5020262347414661, + "learning_rate": 3.378622561612105e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17644500732421875, + "step": 1575, + "valid_targets_mean": 4395.3, + "valid_targets_min": 535 + }, + { + "epoch": 2.333825701624815, + "grad_norm": 0.48126474522159196, + "learning_rate": 3.3732768230145834e-05, + "loss": 0.1992, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2069077491760254, + "step": 1580, + "valid_targets_mean": 4419.8, + "valid_targets_min": 617 + }, + { + "epoch": 2.341211225997046, + "grad_norm": 0.35388137699572986, + "learning_rate": 3.3679124567150195e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15227368474006653, + "step": 1585, + "valid_targets_mean": 6494.9, + "valid_targets_min": 682 + }, + { + "epoch": 2.348596750369276, + "grad_norm": 0.3840750165781411, + "learning_rate": 3.362529535477923e-05, + "loss": 0.2158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.170127272605896, + "step": 1590, + "valid_targets_mean": 6790.0, + "valid_targets_min": 1006 + }, + { + "epoch": 2.3559822747415065, + "grad_norm": 0.5434507333346537, + "learning_rate": 3.357128132319494e-05, + "loss": 0.2114, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20456504821777344, + "step": 1595, + "valid_targets_mean": 3017.6, + "valid_targets_min": 320 + }, + { + "epoch": 2.363367799113737, + "grad_norm": 0.4069694394082241, + "learning_rate": 3.3517083205066275e-05, + "loss": 0.2013, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19211331009864807, + "step": 1600, + "valid_targets_mean": 5513.6, + "valid_targets_min": 681 + }, + { + "epoch": 2.3707533234859675, + "grad_norm": 0.41784163330661017, + "learning_rate": 3.34627017355592e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18239319324493408, + "step": 1605, + "valid_targets_mean": 4941.2, + "valid_targets_min": 540 + }, + { + "epoch": 2.3781388478581977, + "grad_norm": 0.39238043020867075, + "learning_rate": 3.340813765232675e-05, + "loss": 0.2059, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1841011941432953, + "step": 1610, + "valid_targets_mean": 5700.5, + "valid_targets_min": 517 + }, + { + "epoch": 2.3855243722304285, + "grad_norm": 0.4201145604723071, + "learning_rate": 3.3353391695499e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20100927352905273, + "step": 1615, + "valid_targets_mean": 5057.5, + "valid_targets_min": 491 + }, + { + "epoch": 2.3929098966026587, + "grad_norm": 0.41846525077178565, + "learning_rate": 3.329846460767305e-05, + "loss": 0.1919, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20187963545322418, + "step": 1620, + "valid_targets_mean": 4845.5, + "valid_targets_min": 523 + }, + { + "epoch": 2.4002954209748895, + "grad_norm": 0.40798367807953856, + "learning_rate": 3.324335713390292e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15751047432422638, + "step": 1625, + "valid_targets_mean": 5196.8, + "valid_targets_min": 314 + }, + { + "epoch": 2.4076809453471197, + "grad_norm": 0.4061768747540575, + "learning_rate": 3.3188070021689464e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17988130450248718, + "step": 1630, + "valid_targets_mean": 5505.8, + "valid_targets_min": 605 + }, + { + "epoch": 2.41506646971935, + "grad_norm": 0.46242647917420265, + "learning_rate": 3.313260402097024e-05, + "loss": 0.2236, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2638862729072571, + "step": 1635, + "valid_targets_mean": 5006.3, + "valid_targets_min": 510 + }, + { + "epoch": 2.4224519940915803, + "grad_norm": 0.49810161190363844, + "learning_rate": 3.307695988410931e-05, + "loss": 0.2147, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2404661476612091, + "step": 1640, + "valid_targets_mean": 5057.5, + "valid_targets_min": 870 + }, + { + "epoch": 2.429837518463811, + "grad_norm": 0.49091951535741857, + "learning_rate": 3.302113836588705e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21287889778614044, + "step": 1645, + "valid_targets_mean": 3981.2, + "valid_targets_min": 336 + }, + { + "epoch": 2.4372230428360413, + "grad_norm": 0.570596929329595, + "learning_rate": 3.296514022348991e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22047343850135803, + "step": 1650, + "valid_targets_mean": 5769.9, + "valid_targets_min": 747 + }, + { + "epoch": 2.444608567208272, + "grad_norm": 0.45209355569835663, + "learning_rate": 3.2908966216500164e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17555588483810425, + "step": 1655, + "valid_targets_mean": 5262.3, + "valid_targets_min": 642 + }, + { + "epoch": 2.4519940915805023, + "grad_norm": 0.5750894848392384, + "learning_rate": 3.2852617106885554e-05, + "loss": 0.1904, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24034175276756287, + "step": 1660, + "valid_targets_mean": 2897.2, + "valid_targets_min": 553 + }, + { + "epoch": 2.4593796159527326, + "grad_norm": 0.44533416665746417, + "learning_rate": 3.279609365898903e-05, + "loss": 0.1968, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18521445989608765, + "step": 1665, + "valid_targets_mean": 4074.0, + "valid_targets_min": 559 + }, + { + "epoch": 2.466765140324963, + "grad_norm": 0.3666396515845462, + "learning_rate": 3.273939663951828e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1782068908214569, + "step": 1670, + "valid_targets_mean": 6837.4, + "valid_targets_min": 600 + }, + { + "epoch": 2.4741506646971936, + "grad_norm": 0.3899278710986994, + "learning_rate": 3.268252681753546e-05, + "loss": 0.2211, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2376466989517212, + "step": 1675, + "valid_targets_mean": 6254.3, + "valid_targets_min": 784 + }, + { + "epoch": 2.481536189069424, + "grad_norm": 0.4081436844860877, + "learning_rate": 3.262548496444664e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19119364023208618, + "step": 1680, + "valid_targets_mean": 5165.9, + "valid_targets_min": 669 + }, + { + "epoch": 2.4889217134416546, + "grad_norm": 0.3782408782114525, + "learning_rate": 3.25682718539914e-05, + "loss": 0.1849, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1731245368719101, + "step": 1685, + "valid_targets_mean": 6192.8, + "valid_targets_min": 844 + }, + { + "epoch": 2.496307237813885, + "grad_norm": 0.5214383122274813, + "learning_rate": 3.251088826223235e-05, + "loss": 0.2174, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2473294734954834, + "step": 1690, + "valid_targets_mean": 3865.7, + "valid_targets_min": 553 + }, + { + "epoch": 2.503692762186115, + "grad_norm": 0.48463478522919884, + "learning_rate": 3.245333496754455e-05, + "loss": 0.2039, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2620091736316681, + "step": 1695, + "valid_targets_mean": 4272.6, + "valid_targets_min": 713 + }, + { + "epoch": 2.5110782865583454, + "grad_norm": 0.48594394962497084, + "learning_rate": 3.239561275060501e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23510144650936127, + "step": 1700, + "valid_targets_mean": 4500.4, + "valid_targets_min": 516 + }, + { + "epoch": 2.518463810930576, + "grad_norm": 0.36026021621317095, + "learning_rate": 3.233772239438206e-05, + "loss": 0.198, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24801120162010193, + "step": 1705, + "valid_targets_mean": 7072.2, + "valid_targets_min": 775 + }, + { + "epoch": 2.5258493353028064, + "grad_norm": 0.5691361574973622, + "learning_rate": 3.2279664684124724e-05, + "loss": 0.1969, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24925799667835236, + "step": 1710, + "valid_targets_mean": 4504.7, + "valid_targets_min": 377 + }, + { + "epoch": 2.533234859675037, + "grad_norm": 0.399794469998823, + "learning_rate": 3.22214404073521e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16049984097480774, + "step": 1715, + "valid_targets_mean": 6125.9, + "valid_targets_min": 497 + }, + { + "epoch": 2.5406203840472674, + "grad_norm": 0.41442596267228626, + "learning_rate": 3.216305035384268e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17098268866539001, + "step": 1720, + "valid_targets_mean": 5603.4, + "valid_targets_min": 686 + }, + { + "epoch": 2.5480059084194977, + "grad_norm": 0.4162604595759132, + "learning_rate": 3.210449531562361e-05, + "loss": 0.1942, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16771002113819122, + "step": 1725, + "valid_targets_mean": 4639.4, + "valid_targets_min": 552 + }, + { + "epoch": 2.555391432791728, + "grad_norm": 0.38874831725964026, + "learning_rate": 3.2045776086959945e-05, + "loss": 0.2123, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17640861868858337, + "step": 1730, + "valid_targets_mean": 5082.9, + "valid_targets_min": 125 + }, + { + "epoch": 2.5627769571639587, + "grad_norm": 0.36315151583426797, + "learning_rate": 3.1986893464343913e-05, + "loss": 0.213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1778927743434906, + "step": 1735, + "valid_targets_mean": 5937.7, + "valid_targets_min": 826 + }, + { + "epoch": 2.570162481536189, + "grad_norm": 0.44124180419701514, + "learning_rate": 3.192784824648405e-05, + "loss": 0.2125, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22931697964668274, + "step": 1740, + "valid_targets_mean": 4890.1, + "valid_targets_min": 436 + }, + { + "epoch": 2.5775480059084197, + "grad_norm": 0.4646453073195735, + "learning_rate": 3.186864123429443e-05, + "loss": 0.2027, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1924285590648651, + "step": 1745, + "valid_targets_mean": 4559.1, + "valid_targets_min": 626 + }, + { + "epoch": 2.58493353028065, + "grad_norm": 0.3769877916171565, + "learning_rate": 3.180927323088377e-05, + "loss": 0.2004, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19486212730407715, + "step": 1750, + "valid_targets_mean": 7005.6, + "valid_targets_min": 873 + }, + { + "epoch": 2.5923190546528803, + "grad_norm": 0.4122861646475437, + "learning_rate": 3.1749745041544535e-05, + "loss": 0.202, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1655203104019165, + "step": 1755, + "valid_targets_mean": 5165.2, + "valid_targets_min": 362 + }, + { + "epoch": 2.5997045790251105, + "grad_norm": 0.46795037129236655, + "learning_rate": 3.169005747374202e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.210823655128479, + "step": 1760, + "valid_targets_mean": 4634.7, + "valid_targets_min": 557 + }, + { + "epoch": 2.6070901033973413, + "grad_norm": 0.40305455364946613, + "learning_rate": 3.163021133710339e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19001798331737518, + "step": 1765, + "valid_targets_mean": 6316.5, + "valid_targets_min": 515 + }, + { + "epoch": 2.6144756277695715, + "grad_norm": 0.37687379981581354, + "learning_rate": 3.1570207443406715e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.156022310256958, + "step": 1770, + "valid_targets_mean": 5602.8, + "valid_targets_min": 636 + }, + { + "epoch": 2.6218611521418023, + "grad_norm": 0.41470758369539723, + "learning_rate": 3.1510046606569944e-05, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1826784908771515, + "step": 1775, + "valid_targets_mean": 5436.1, + "valid_targets_min": 541 + }, + { + "epoch": 2.6292466765140325, + "grad_norm": 0.3852868541588975, + "learning_rate": 3.1449729642639856e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17214235663414001, + "step": 1780, + "valid_targets_mean": 5589.0, + "valid_targets_min": 475 + }, + { + "epoch": 2.636632200886263, + "grad_norm": 0.4088176838039698, + "learning_rate": 3.138925736978103e-05, + "loss": 0.2212, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18025021255016327, + "step": 1785, + "valid_targets_mean": 4904.5, + "valid_targets_min": 833 + }, + { + "epoch": 2.644017725258493, + "grad_norm": 0.4491076662753666, + "learning_rate": 3.132863060826469e-05, + "loss": 0.1961, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18313133716583252, + "step": 1790, + "valid_targets_mean": 4372.4, + "valid_targets_min": 504 + }, + { + "epoch": 2.651403249630724, + "grad_norm": 0.4132101343175699, + "learning_rate": 3.126785018045764e-05, + "loss": 0.2117, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18020716309547424, + "step": 1795, + "valid_targets_mean": 5285.4, + "valid_targets_min": 324 + }, + { + "epoch": 2.658788774002954, + "grad_norm": 0.6224525946031296, + "learning_rate": 3.120691691081104e-05, + "loss": 0.1933, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18289199471473694, + "step": 1800, + "valid_targets_mean": 5814.2, + "valid_targets_min": 712 + }, + { + "epoch": 2.666174298375185, + "grad_norm": 0.37344081795259465, + "learning_rate": 3.1145831625849315e-05, + "loss": 0.2139, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18605783581733704, + "step": 1805, + "valid_targets_mean": 6338.0, + "valid_targets_min": 708 + }, + { + "epoch": 2.673559822747415, + "grad_norm": 0.45469603573341405, + "learning_rate": 3.1084595154158814e-05, + "loss": 0.1901, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20985165238380432, + "step": 1810, + "valid_targets_mean": 4686.1, + "valid_targets_min": 694 + }, + { + "epoch": 2.6809453471196454, + "grad_norm": 0.42143425429690434, + "learning_rate": 3.1023208326376716e-05, + "loss": 0.2089, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1601705253124237, + "step": 1815, + "valid_targets_mean": 5887.0, + "valid_targets_min": 629 + }, + { + "epoch": 2.6883308714918757, + "grad_norm": 0.4566218104083272, + "learning_rate": 3.0961671975179674e-05, + "loss": 0.1909, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22524869441986084, + "step": 1820, + "valid_targets_mean": 5187.4, + "valid_targets_min": 425 + }, + { + "epoch": 2.6957163958641064, + "grad_norm": 0.45904018086003157, + "learning_rate": 3.08999869352725e-05, + "loss": 0.1894, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2111627757549286, + "step": 1825, + "valid_targets_mean": 4385.9, + "valid_targets_min": 476 + }, + { + "epoch": 2.7031019202363367, + "grad_norm": 0.3947587132525764, + "learning_rate": 3.0838154043376936e-05, + "loss": 0.1834, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1816597878932953, + "step": 1830, + "valid_targets_mean": 5509.4, + "valid_targets_min": 311 + }, + { + "epoch": 2.7104874446085674, + "grad_norm": 0.44429247184816495, + "learning_rate": 3.077617413822022e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16769450902938843, + "step": 1835, + "valid_targets_mean": 5723.1, + "valid_targets_min": 650 + }, + { + "epoch": 2.7178729689807977, + "grad_norm": 0.39766345484896237, + "learning_rate": 3.0714048060523745e-05, + "loss": 0.2043, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.183930903673172, + "step": 1840, + "valid_targets_mean": 5270.6, + "valid_targets_min": 926 + }, + { + "epoch": 2.725258493353028, + "grad_norm": 0.36819777572651946, + "learning_rate": 3.065177665299166e-05, + "loss": 0.2085, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18269002437591553, + "step": 1845, + "valid_targets_mean": 6104.4, + "valid_targets_min": 573 + }, + { + "epoch": 2.7326440177252582, + "grad_norm": 0.656642708317576, + "learning_rate": 3.0589360760299416e-05, + "loss": 0.1874, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2284368872642517, + "step": 1850, + "valid_targets_mean": 4357.9, + "valid_targets_min": 457 + }, + { + "epoch": 2.740029542097489, + "grad_norm": 0.40389665086925014, + "learning_rate": 3.052680122908232e-05, + "loss": 0.1944, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1856120228767395, + "step": 1855, + "valid_targets_mean": 4957.6, + "valid_targets_min": 529 + }, + { + "epoch": 2.7474150664697192, + "grad_norm": 0.34197083654429966, + "learning_rate": 3.046409890792406e-05, + "loss": 0.2, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15076898038387299, + "step": 1860, + "valid_targets_mean": 5943.9, + "valid_targets_min": 490 + }, + { + "epoch": 2.75480059084195, + "grad_norm": 0.39694434070650186, + "learning_rate": 3.040125464734519e-05, + "loss": 0.2186, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22727245092391968, + "step": 1865, + "valid_targets_mean": 6380.9, + "valid_targets_min": 611 + }, + { + "epoch": 2.7621861152141802, + "grad_norm": 0.48955312172318344, + "learning_rate": 3.0338269299791573e-05, + "loss": 0.1881, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18965637683868408, + "step": 1870, + "valid_targets_mean": 4740.5, + "valid_targets_min": 595 + }, + { + "epoch": 2.7695716395864105, + "grad_norm": 0.498553979771601, + "learning_rate": 3.0275143719622853e-05, + "loss": 0.196, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18993785977363586, + "step": 1875, + "valid_targets_mean": 6250.0, + "valid_targets_min": 790 + }, + { + "epoch": 2.7769571639586412, + "grad_norm": 0.4680949760689842, + "learning_rate": 3.0211878763100836e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19833850860595703, + "step": 1880, + "valid_targets_mean": 4042.8, + "valid_targets_min": 639 + }, + { + "epoch": 2.7843426883308715, + "grad_norm": 0.6983614556096809, + "learning_rate": 3.0148475288377896e-05, + "loss": 0.1929, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18655887246131897, + "step": 1885, + "valid_targets_mean": 5421.7, + "valid_targets_min": 761 + }, + { + "epoch": 2.791728212703102, + "grad_norm": 0.3993729610123753, + "learning_rate": 3.0084934155485315e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17627528309822083, + "step": 1890, + "valid_targets_mean": 5305.3, + "valid_targets_min": 598 + }, + { + "epoch": 2.7991137370753325, + "grad_norm": 0.3486684335819875, + "learning_rate": 3.0021256226321643e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17181184887886047, + "step": 1895, + "valid_targets_mean": 6981.1, + "valid_targets_min": 461 + }, + { + "epoch": 2.806499261447563, + "grad_norm": 0.3996801724597566, + "learning_rate": 2.995744236464098e-05, + "loss": 0.1824, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19423222541809082, + "step": 1900, + "valid_targets_mean": 5607.2, + "valid_targets_min": 658 + }, + { + "epoch": 2.813884785819793, + "grad_norm": 0.4684471879315951, + "learning_rate": 2.9893493436041274e-05, + "loss": 0.2038, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2360610067844391, + "step": 1905, + "valid_targets_mean": 4200.7, + "valid_targets_min": 726 + }, + { + "epoch": 2.821270310192024, + "grad_norm": 0.5224516946852575, + "learning_rate": 2.9829410307952577e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18097460269927979, + "step": 1910, + "valid_targets_mean": 5226.1, + "valid_targets_min": 453 + }, + { + "epoch": 2.828655834564254, + "grad_norm": 0.3919890597149292, + "learning_rate": 2.9765193849625283e-05, + "loss": 0.1958, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16534635424613953, + "step": 1915, + "valid_targets_mean": 5494.3, + "valid_targets_min": 459 + }, + { + "epoch": 2.8360413589364843, + "grad_norm": 0.3195931232516741, + "learning_rate": 2.9700844932118334e-05, + "loss": 0.1837, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1442551165819168, + "step": 1920, + "valid_targets_mean": 6732.5, + "valid_targets_min": 614 + }, + { + "epoch": 2.843426883308715, + "grad_norm": 0.42131927060359514, + "learning_rate": 2.9636364428287395e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19797298312187195, + "step": 1925, + "valid_targets_mean": 4944.9, + "valid_targets_min": 456 + }, + { + "epoch": 2.8508124076809453, + "grad_norm": 0.3873317542623266, + "learning_rate": 2.9571753212773028e-05, + "loss": 0.2149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17931729555130005, + "step": 1930, + "valid_targets_mean": 5626.6, + "valid_targets_min": 496 + }, + { + "epoch": 2.8581979320531756, + "grad_norm": 0.4030896596505125, + "learning_rate": 2.9507012161988827e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17033201456069946, + "step": 1935, + "valid_targets_mean": 6326.3, + "valid_targets_min": 768 + }, + { + "epoch": 2.8655834564254064, + "grad_norm": 0.4444109946556492, + "learning_rate": 2.9442142154109522e-05, + "loss": 0.1995, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1885213851928711, + "step": 1940, + "valid_targets_mean": 5517.4, + "valid_targets_min": 802 + }, + { + "epoch": 2.8729689807976366, + "grad_norm": 0.46145234382410805, + "learning_rate": 2.937714406905906e-05, + "loss": 0.1889, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20272204279899597, + "step": 1945, + "valid_targets_mean": 5488.1, + "valid_targets_min": 656 + }, + { + "epoch": 2.880354505169867, + "grad_norm": 0.42923087358401046, + "learning_rate": 2.9312018788498714e-05, + "loss": 0.2022, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19835910201072693, + "step": 1950, + "valid_targets_mean": 4609.9, + "valid_targets_min": 246 + }, + { + "epoch": 2.8877400295420976, + "grad_norm": 0.36880559499022475, + "learning_rate": 2.924676719581506e-05, + "loss": 0.1928, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19959020614624023, + "step": 1955, + "valid_targets_mean": 7206.6, + "valid_targets_min": 337 + }, + { + "epoch": 2.895125553914328, + "grad_norm": 0.3694784724494498, + "learning_rate": 2.9181390176108024e-05, + "loss": 0.1957, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16610810160636902, + "step": 1960, + "valid_targets_mean": 5820.9, + "valid_targets_min": 743 + }, + { + "epoch": 2.902511078286558, + "grad_norm": 0.3748352447951345, + "learning_rate": 2.911588861617889e-05, + "loss": 0.2003, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1821543574333191, + "step": 1965, + "valid_targets_mean": 6034.1, + "valid_targets_min": 257 + }, + { + "epoch": 2.909896602658789, + "grad_norm": 0.46557987222831126, + "learning_rate": 2.9050263404518254e-05, + "loss": 0.2077, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2896730303764343, + "step": 1970, + "valid_targets_mean": 5670.9, + "valid_targets_min": 664 + }, + { + "epoch": 2.917282127031019, + "grad_norm": 0.45582660198656316, + "learning_rate": 2.8984515431293982e-05, + "loss": 0.2213, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21186470985412598, + "step": 1975, + "valid_targets_mean": 5415.5, + "valid_targets_min": 511 + }, + { + "epoch": 2.9246676514032495, + "grad_norm": 0.39058091819087787, + "learning_rate": 2.89186455883391e-05, + "loss": 0.2232, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18955285847187042, + "step": 1980, + "valid_targets_mean": 5434.4, + "valid_targets_min": 675 + }, + { + "epoch": 2.93205317577548, + "grad_norm": 0.38016760545252115, + "learning_rate": 2.8852654769139774e-05, + "loss": 0.2191, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.243728905916214, + "step": 1985, + "valid_targets_mean": 6925.2, + "valid_targets_min": 673 + }, + { + "epoch": 2.9394387001477105, + "grad_norm": 0.546712089772061, + "learning_rate": 2.8786543868823104e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17349129915237427, + "step": 1990, + "valid_targets_mean": 4468.9, + "valid_targets_min": 543 + }, + { + "epoch": 2.9468242245199407, + "grad_norm": 0.4515115845526151, + "learning_rate": 2.8720313784145036e-05, + "loss": 0.1934, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20189234614372253, + "step": 1995, + "valid_targets_mean": 4474.2, + "valid_targets_min": 495 + }, + { + "epoch": 2.9542097488921715, + "grad_norm": 0.3831876618295643, + "learning_rate": 2.865396541347818e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15678755939006805, + "step": 2000, + "valid_targets_mean": 4760.3, + "valid_targets_min": 370 + }, + { + "epoch": 2.9615952732644018, + "grad_norm": 0.40740457187470347, + "learning_rate": 2.8587499656799624e-05, + "loss": 0.1897, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17568612098693848, + "step": 2005, + "valid_targets_mean": 4997.9, + "valid_targets_min": 742 + }, + { + "epoch": 2.9689807976366325, + "grad_norm": 0.3735878296428736, + "learning_rate": 2.852091741567874e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21047380566596985, + "step": 2010, + "valid_targets_mean": 6746.7, + "valid_targets_min": 587 + }, + { + "epoch": 2.9763663220088628, + "grad_norm": 0.4064437208033243, + "learning_rate": 2.8454219593264924e-05, + "loss": 0.1963, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.168643057346344, + "step": 2015, + "valid_targets_mean": 5444.5, + "valid_targets_min": 282 + }, + { + "epoch": 2.983751846381093, + "grad_norm": 0.4221057265749455, + "learning_rate": 2.8387407094275378e-05, + "loss": 0.1799, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20394739508628845, + "step": 2020, + "valid_targets_mean": 5266.9, + "valid_targets_min": 790 + }, + { + "epoch": 2.9911373707533233, + "grad_norm": 0.35734323137197604, + "learning_rate": 2.8320480824982836e-05, + "loss": 0.1776, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15881027281284332, + "step": 2025, + "valid_targets_mean": 5607.5, + "valid_targets_min": 572 + }, + { + "epoch": 2.998522895125554, + "grad_norm": 0.40362443061193043, + "learning_rate": 2.825344169320323e-05, + "loss": 0.1877, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19752827286720276, + "step": 2030, + "valid_targets_mean": 5551.8, + "valid_targets_min": 636 + }, + { + "epoch": 3.0059084194977843, + "grad_norm": 0.3483325395168603, + "learning_rate": 2.818629060828344e-05, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15444116294384003, + "step": 2035, + "valid_targets_mean": 6590.1, + "valid_targets_min": 2768 + }, + { + "epoch": 3.0132939438700146, + "grad_norm": 0.46266772441200416, + "learning_rate": 2.811902848108889e-05, + "loss": 0.1764, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18906280398368835, + "step": 2040, + "valid_targets_mean": 4889.4, + "valid_targets_min": 873 + }, + { + "epoch": 3.0206794682422453, + "grad_norm": 0.41227568999635833, + "learning_rate": 2.8051656223991274e-05, + "loss": 0.1989, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16269494593143463, + "step": 2045, + "valid_targets_mean": 5567.1, + "valid_targets_min": 616 + }, + { + "epoch": 3.0280649926144756, + "grad_norm": 0.46419141803047215, + "learning_rate": 2.798417475085608e-05, + "loss": 0.1797, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1930653154850006, + "step": 2050, + "valid_targets_mean": 5456.0, + "valid_targets_min": 708 + }, + { + "epoch": 3.035450516986706, + "grad_norm": 0.4383446251656798, + "learning_rate": 2.791658497703029e-05, + "loss": 0.1809, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18855616450309753, + "step": 2055, + "valid_targets_mean": 5512.9, + "valid_targets_min": 571 + }, + { + "epoch": 3.0428360413589366, + "grad_norm": 0.37011575013943987, + "learning_rate": 2.7848887819329903e-05, + "loss": 0.1869, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16166454553604126, + "step": 2060, + "valid_targets_mean": 6532.8, + "valid_targets_min": 1512 + }, + { + "epoch": 3.050221565731167, + "grad_norm": 0.40311646790756955, + "learning_rate": 2.7781084196027527e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19137972593307495, + "step": 2065, + "valid_targets_mean": 5962.6, + "valid_targets_min": 564 + }, + { + "epoch": 3.057607090103397, + "grad_norm": 0.4275359756981932, + "learning_rate": 2.7713175026839905e-05, + "loss": 0.1744, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21499311923980713, + "step": 2070, + "valid_targets_mean": 5277.0, + "valid_targets_min": 709 + }, + { + "epoch": 3.064992614475628, + "grad_norm": 0.3752504219955399, + "learning_rate": 2.7645161232915444e-05, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16594503819942474, + "step": 2075, + "valid_targets_mean": 6164.9, + "valid_targets_min": 432 + }, + { + "epoch": 3.072378138847858, + "grad_norm": 0.4882106097565124, + "learning_rate": 2.7577043736821748e-05, + "loss": 0.1793, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21406424045562744, + "step": 2080, + "valid_targets_mean": 4252.2, + "valid_targets_min": 625 + }, + { + "epoch": 3.0797636632200884, + "grad_norm": 0.3525875278030082, + "learning_rate": 2.750882346253305e-05, + "loss": 0.1792, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13793540000915527, + "step": 2085, + "valid_targets_mean": 6917.0, + "valid_targets_min": 778 + }, + { + "epoch": 3.087149187592319, + "grad_norm": 0.4320447754658649, + "learning_rate": 2.7440501335417748e-05, + "loss": 0.1738, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1812410056591034, + "step": 2090, + "valid_targets_mean": 5966.3, + "valid_targets_min": 497 + }, + { + "epoch": 3.0945347119645494, + "grad_norm": 0.5117110061423488, + "learning_rate": 2.7372078282225772e-05, + "loss": 0.1876, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2231147289276123, + "step": 2095, + "valid_targets_mean": 4128.2, + "valid_targets_min": 540 + }, + { + "epoch": 3.1019202363367797, + "grad_norm": 0.4095198035952486, + "learning_rate": 2.730355523107608e-05, + "loss": 0.1735, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16321970522403717, + "step": 2100, + "valid_targets_mean": 5197.5, + "valid_targets_min": 461 + }, + { + "epoch": 3.1093057607090104, + "grad_norm": 0.47545224234959904, + "learning_rate": 2.7234933111444046e-05, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16089124977588654, + "step": 2105, + "valid_targets_mean": 3588.7, + "valid_targets_min": 322 + }, + { + "epoch": 3.1166912850812407, + "grad_norm": 0.3910891142579986, + "learning_rate": 2.7166212854148838e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2100703865289688, + "step": 2110, + "valid_targets_mean": 6335.6, + "valid_targets_min": 708 + }, + { + "epoch": 3.124076809453471, + "grad_norm": 0.4204297819786424, + "learning_rate": 2.709739539134081e-05, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17549830675125122, + "step": 2115, + "valid_targets_mean": 4854.4, + "valid_targets_min": 803 + }, + { + "epoch": 3.1314623338257017, + "grad_norm": 0.4084245651713014, + "learning_rate": 2.7028481656488856e-05, + "loss": 0.1751, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15884898602962494, + "step": 2120, + "valid_targets_mean": 4327.5, + "valid_targets_min": 546 + }, + { + "epoch": 3.138847858197932, + "grad_norm": 0.5289348550596176, + "learning_rate": 2.6959472584367737e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18101103603839874, + "step": 2125, + "valid_targets_mean": 4410.6, + "valid_targets_min": 696 + }, + { + "epoch": 3.1462333825701623, + "grad_norm": 0.4419683145308729, + "learning_rate": 2.689036911104542e-05, + "loss": 0.1873, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24032284319400787, + "step": 2130, + "valid_targets_mean": 5461.6, + "valid_targets_min": 730 + }, + { + "epoch": 3.153618906942393, + "grad_norm": 0.7124574698928174, + "learning_rate": 2.6821172173870355e-05, + "loss": 0.1795, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15942493081092834, + "step": 2135, + "valid_targets_mean": 5982.3, + "valid_targets_min": 228 + }, + { + "epoch": 3.1610044313146233, + "grad_norm": 0.4691946439142087, + "learning_rate": 2.6751882711458783e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16036120057106018, + "step": 2140, + "valid_targets_mean": 4745.8, + "valid_targets_min": 652 + }, + { + "epoch": 3.1683899556868536, + "grad_norm": 0.3657559670571617, + "learning_rate": 2.6682501663682e-05, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16519255936145782, + "step": 2145, + "valid_targets_mean": 6309.1, + "valid_targets_min": 684 + }, + { + "epoch": 3.1757754800590843, + "grad_norm": 0.4525601076810765, + "learning_rate": 2.661302997165359e-05, + "loss": 0.1784, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16392679512500763, + "step": 2150, + "valid_targets_mean": 4315.6, + "valid_targets_min": 598 + }, + { + "epoch": 3.1831610044313146, + "grad_norm": 0.4756846684204359, + "learning_rate": 2.6543468577716694e-05, + "loss": 0.2019, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2014717161655426, + "step": 2155, + "valid_targets_mean": 4423.4, + "valid_targets_min": 594 + }, + { + "epoch": 3.1905465288035453, + "grad_norm": 0.4270410035428385, + "learning_rate": 2.6473818425431185e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542786955833435, + "step": 2160, + "valid_targets_mean": 5105.6, + "valid_targets_min": 348 + }, + { + "epoch": 3.1979320531757756, + "grad_norm": 0.4702062705925336, + "learning_rate": 2.640408045956091e-05, + "loss": 0.1884, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19304950535297394, + "step": 2165, + "valid_targets_mean": 4532.8, + "valid_targets_min": 674 + }, + { + "epoch": 3.205317577548006, + "grad_norm": 0.45151853190869246, + "learning_rate": 2.6334255626060842e-05, + "loss": 0.1733, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19992440938949585, + "step": 2170, + "valid_targets_mean": 4673.7, + "valid_targets_min": 658 + }, + { + "epoch": 3.212703101920236, + "grad_norm": 0.42434196985334355, + "learning_rate": 2.626434487206428e-05, + "loss": 0.1763, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18652237951755524, + "step": 2175, + "valid_targets_mean": 5400.8, + "valid_targets_min": 528 + }, + { + "epoch": 3.220088626292467, + "grad_norm": 0.4063121122695259, + "learning_rate": 2.6194349145869965e-05, + "loss": 0.185, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16594845056533813, + "step": 2180, + "valid_targets_mean": 5230.7, + "valid_targets_min": 369 + }, + { + "epoch": 3.227474150664697, + "grad_norm": 0.447772859466632, + "learning_rate": 2.6124269396929256e-05, + "loss": 0.169, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17529018223285675, + "step": 2185, + "valid_targets_mean": 4556.4, + "valid_targets_min": 562 + }, + { + "epoch": 3.234859675036928, + "grad_norm": 0.40325224428720596, + "learning_rate": 2.6054106575833206e-05, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18165422976016998, + "step": 2190, + "valid_targets_mean": 5751.3, + "valid_targets_min": 644 + }, + { + "epoch": 3.242245199409158, + "grad_norm": 0.4248179861853043, + "learning_rate": 2.5983861634299715e-05, + "loss": 0.1938, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15987026691436768, + "step": 2195, + "valid_targets_mean": 5245.4, + "valid_targets_min": 605 + }, + { + "epoch": 3.2496307237813884, + "grad_norm": 0.43315325209313854, + "learning_rate": 2.5913535525160608e-05, + "loss": 0.2076, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19797955453395844, + "step": 2200, + "valid_targets_mean": 5092.6, + "valid_targets_min": 595 + }, + { + "epoch": 3.2570162481536187, + "grad_norm": 0.45899763861106463, + "learning_rate": 2.5843129202348666e-05, + "loss": 0.1825, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1837209016084671, + "step": 2205, + "valid_targets_mean": 5431.0, + "valid_targets_min": 905 + }, + { + "epoch": 3.2644017725258494, + "grad_norm": 0.34185089958623904, + "learning_rate": 2.577264362088475e-05, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12556633353233337, + "step": 2210, + "valid_targets_mean": 6299.7, + "valid_targets_min": 521 + }, + { + "epoch": 3.2717872968980797, + "grad_norm": 0.41205572297039716, + "learning_rate": 2.5702079736864815e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16507720947265625, + "step": 2215, + "valid_targets_mean": 5128.1, + "valid_targets_min": 821 + }, + { + "epoch": 3.2791728212703104, + "grad_norm": 0.3997869068309829, + "learning_rate": 2.5631438507446936e-05, + "loss": 0.1922, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1773259937763214, + "step": 2220, + "valid_targets_mean": 5669.0, + "valid_targets_min": 665 + }, + { + "epoch": 3.2865583456425407, + "grad_norm": 0.4984598466794601, + "learning_rate": 2.5560720890838332e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2146761119365692, + "step": 2225, + "valid_targets_mean": 4352.7, + "valid_targets_min": 648 + }, + { + "epoch": 3.293943870014771, + "grad_norm": 0.47607510524209484, + "learning_rate": 2.5489927846282386e-05, + "loss": 0.1822, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1854625642299652, + "step": 2230, + "valid_targets_mean": 4233.4, + "valid_targets_min": 504 + }, + { + "epoch": 3.3013293943870012, + "grad_norm": 0.4239465864105787, + "learning_rate": 2.5419060334045606e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17958292365074158, + "step": 2235, + "valid_targets_mean": 5336.4, + "valid_targets_min": 674 + }, + { + "epoch": 3.308714918759232, + "grad_norm": 0.4619161542340498, + "learning_rate": 2.5348119315404604e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17576739192008972, + "step": 2240, + "valid_targets_mean": 5028.8, + "valid_targets_min": 572 + }, + { + "epoch": 3.3161004431314622, + "grad_norm": 0.37782695093104735, + "learning_rate": 2.5277105752633083e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1741185039281845, + "step": 2245, + "valid_targets_mean": 5921.9, + "valid_targets_min": 425 + }, + { + "epoch": 3.323485967503693, + "grad_norm": 0.46105586751392597, + "learning_rate": 2.520602060898874e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2181580513715744, + "step": 2250, + "valid_targets_mean": 4782.2, + "valid_targets_min": 785 + }, + { + "epoch": 3.3308714918759232, + "grad_norm": 0.44233045621174916, + "learning_rate": 2.5134864848700247e-05, + "loss": 0.1926, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21820692718029022, + "step": 2255, + "valid_targets_mean": 5393.2, + "valid_targets_min": 628 + }, + { + "epoch": 3.3382570162481535, + "grad_norm": 0.4176926479653841, + "learning_rate": 2.5063639436954145e-05, + "loss": 0.1949, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2524767518043518, + "step": 2260, + "valid_targets_mean": 6778.2, + "valid_targets_min": 609 + }, + { + "epoch": 3.345642540620384, + "grad_norm": 0.3785623480657544, + "learning_rate": 2.499234533988174e-05, + "loss": 0.2208, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15883594751358032, + "step": 2265, + "valid_targets_mean": 6011.2, + "valid_targets_min": 541 + }, + { + "epoch": 3.3530280649926145, + "grad_norm": 0.9489103748663574, + "learning_rate": 2.4920983524546035e-05, + "loss": 0.1826, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19622023403644562, + "step": 2270, + "valid_targets_mean": 2849.2, + "valid_targets_min": 336 + }, + { + "epoch": 3.360413589364845, + "grad_norm": 0.4851485979634193, + "learning_rate": 2.4849554958928582e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1570294201374054, + "step": 2275, + "valid_targets_mean": 4828.0, + "valid_targets_min": 533 + }, + { + "epoch": 3.3677991137370755, + "grad_norm": 0.38871049650595657, + "learning_rate": 2.477806061191637e-05, + "loss": 0.1867, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16449853777885437, + "step": 2280, + "valid_targets_mean": 6279.4, + "valid_targets_min": 623 + }, + { + "epoch": 3.375184638109306, + "grad_norm": 0.3846163906395918, + "learning_rate": 2.4706501453288656e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17619986832141876, + "step": 2285, + "valid_targets_mean": 5938.9, + "valid_targets_min": 718 + }, + { + "epoch": 3.382570162481536, + "grad_norm": 0.4036981390266134, + "learning_rate": 2.4634878453703857e-05, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16103024780750275, + "step": 2290, + "valid_targets_mean": 5504.4, + "valid_targets_min": 1317 + }, + { + "epoch": 3.389955686853767, + "grad_norm": 0.6582445011580204, + "learning_rate": 2.4563192584686325e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.192658931016922, + "step": 2295, + "valid_targets_mean": 3992.2, + "valid_targets_min": 602 + }, + { + "epoch": 3.397341211225997, + "grad_norm": 0.37316580719925574, + "learning_rate": 2.4491444818613218e-05, + "loss": 0.1823, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1514277458190918, + "step": 2300, + "valid_targets_mean": 6166.7, + "valid_targets_min": 493 + }, + { + "epoch": 3.4047267355982274, + "grad_norm": 0.36651337529440386, + "learning_rate": 2.441963612870129e-05, + "loss": 0.1829, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17430251836776733, + "step": 2305, + "valid_targets_mean": 6701.6, + "valid_targets_min": 1299 + }, + { + "epoch": 3.412112259970458, + "grad_norm": 0.4562920402638719, + "learning_rate": 2.4347767488993697e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17882443964481354, + "step": 2310, + "valid_targets_mean": 4928.1, + "valid_targets_min": 677 + }, + { + "epoch": 3.4194977843426884, + "grad_norm": 0.40217427857200977, + "learning_rate": 2.4275839874346757e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21625502407550812, + "step": 2315, + "valid_targets_mean": 5914.1, + "valid_targets_min": 642 + }, + { + "epoch": 3.4268833087149186, + "grad_norm": 0.4676729858346907, + "learning_rate": 2.4203854260416764e-05, + "loss": 0.1719, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18612849712371826, + "step": 2320, + "valid_targets_mean": 4202.7, + "valid_targets_min": 513 + }, + { + "epoch": 3.4342688330871494, + "grad_norm": 0.5053850543812328, + "learning_rate": 2.4131811623646745e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20099109411239624, + "step": 2325, + "valid_targets_mean": 5066.5, + "valid_targets_min": 799 + }, + { + "epoch": 3.4416543574593796, + "grad_norm": 0.4177536175359703, + "learning_rate": 2.4059712941253205e-05, + "loss": 0.1731, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17040522396564484, + "step": 2330, + "valid_targets_mean": 6019.4, + "valid_targets_min": 622 + }, + { + "epoch": 3.44903988183161, + "grad_norm": 0.3896994204855158, + "learning_rate": 2.3987559191212855e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17240211367607117, + "step": 2335, + "valid_targets_mean": 5771.4, + "valid_targets_min": 820 + }, + { + "epoch": 3.4564254062038406, + "grad_norm": 0.4519987685160109, + "learning_rate": 2.3915351352249392e-05, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.194617360830307, + "step": 2340, + "valid_targets_mean": 4527.7, + "valid_targets_min": 726 + }, + { + "epoch": 3.463810930576071, + "grad_norm": 0.5253399465778659, + "learning_rate": 2.3843090403820198e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16879601776599884, + "step": 2345, + "valid_targets_mean": 6142.8, + "valid_targets_min": 451 + }, + { + "epoch": 3.471196454948301, + "grad_norm": 0.4277432003720903, + "learning_rate": 2.3770777326103033e-05, + "loss": 0.1891, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18401449918746948, + "step": 2350, + "valid_targets_mean": 5573.9, + "valid_targets_min": 477 + }, + { + "epoch": 3.478581979320532, + "grad_norm": 0.4257500742732944, + "learning_rate": 2.3698413099982772e-05, + "loss": 0.1868, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23105338215827942, + "step": 2355, + "valid_targets_mean": 6022.1, + "valid_targets_min": 577 + }, + { + "epoch": 3.485967503692762, + "grad_norm": 0.35202882188870804, + "learning_rate": 2.3625998707038095e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15384787321090698, + "step": 2360, + "valid_targets_mean": 6247.2, + "valid_targets_min": 743 + }, + { + "epoch": 3.4933530280649925, + "grad_norm": 0.5362069711243463, + "learning_rate": 2.355353512952816e-05, + "loss": 0.1986, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2501859962940216, + "step": 2365, + "valid_targets_mean": 3637.5, + "valid_targets_min": 557 + }, + { + "epoch": 3.500738552437223, + "grad_norm": 0.627740196350393, + "learning_rate": 2.3481023350379282e-05, + "loss": 0.1854, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1972651183605194, + "step": 2370, + "valid_targets_mean": 3431.4, + "valid_targets_min": 298 + }, + { + "epoch": 3.5081240768094535, + "grad_norm": 0.42738807000739665, + "learning_rate": 2.3408464353171603e-05, + "loss": 0.1675, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17980587482452393, + "step": 2375, + "valid_targets_mean": 4957.2, + "valid_targets_min": 606 + }, + { + "epoch": 3.5155096011816838, + "grad_norm": 0.4123970309337914, + "learning_rate": 2.3335859122125762e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1853245347738266, + "step": 2380, + "valid_targets_mean": 4892.4, + "valid_targets_min": 658 + }, + { + "epoch": 3.5228951255539145, + "grad_norm": 0.42163919981185066, + "learning_rate": 2.3263208642089517e-05, + "loss": 0.1855, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15585747361183167, + "step": 2385, + "valid_targets_mean": 4967.4, + "valid_targets_min": 606 + }, + { + "epoch": 3.5302806499261448, + "grad_norm": 0.3631774625240425, + "learning_rate": 2.319051389852441e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15152710676193237, + "step": 2390, + "valid_targets_mean": 6531.5, + "valid_targets_min": 526 + }, + { + "epoch": 3.537666174298375, + "grad_norm": 0.4386225168817978, + "learning_rate": 2.311777587749239e-05, + "loss": 0.187, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17836642265319824, + "step": 2395, + "valid_targets_mean": 5189.2, + "valid_targets_min": 400 + }, + { + "epoch": 3.5450516986706058, + "grad_norm": 0.37272488284870037, + "learning_rate": 2.3044995565642453e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15524575114250183, + "step": 2400, + "valid_targets_mean": 5844.1, + "valid_targets_min": 323 + }, + { + "epoch": 3.552437223042836, + "grad_norm": 0.4273954739176906, + "learning_rate": 2.2972173950197223e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19005119800567627, + "step": 2405, + "valid_targets_mean": 5287.6, + "valid_targets_min": 522 + }, + { + "epoch": 3.5598227474150663, + "grad_norm": 0.3809128573118705, + "learning_rate": 2.2899312018939598e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16557791829109192, + "step": 2410, + "valid_targets_mean": 5387.4, + "valid_targets_min": 424 + }, + { + "epoch": 3.567208271787297, + "grad_norm": 0.445270409658121, + "learning_rate": 2.2826410760199327e-05, + "loss": 0.1768, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16389130055904388, + "step": 2415, + "valid_targets_mean": 6058.5, + "valid_targets_min": 699 + }, + { + "epoch": 3.5745937961595273, + "grad_norm": 0.7167965047318506, + "learning_rate": 2.2753471162839637e-05, + "loss": 0.1758, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20100605487823486, + "step": 2420, + "valid_targets_mean": 6238.5, + "valid_targets_min": 567 + }, + { + "epoch": 3.5819793205317576, + "grad_norm": 0.44061592794128485, + "learning_rate": 2.2680494216243762e-05, + "loss": 0.2084, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20798034965991974, + "step": 2425, + "valid_targets_mean": 5419.6, + "valid_targets_min": 628 + }, + { + "epoch": 3.5893648449039883, + "grad_norm": 0.39676272392212464, + "learning_rate": 2.2607480910301564e-05, + "loss": 0.1939, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18191611766815186, + "step": 2430, + "valid_targets_mean": 6013.3, + "valid_targets_min": 704 + }, + { + "epoch": 3.5967503692762186, + "grad_norm": 0.38941782318875595, + "learning_rate": 2.253443223539613e-05, + "loss": 0.17, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17795924842357635, + "step": 2435, + "valid_targets_mean": 5669.7, + "valid_targets_min": 647 + }, + { + "epoch": 3.604135893648449, + "grad_norm": 0.3942812475735809, + "learning_rate": 2.246134918239028e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445077508687973, + "step": 2440, + "valid_targets_mean": 5918.6, + "valid_targets_min": 641 + }, + { + "epoch": 3.6115214180206796, + "grad_norm": 0.3878199914386221, + "learning_rate": 2.2388232742613146e-05, + "loss": 0.1951, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15950973331928253, + "step": 2445, + "valid_targets_mean": 5599.8, + "valid_targets_min": 462 + }, + { + "epoch": 3.61890694239291, + "grad_norm": 0.40119193945091164, + "learning_rate": 2.231508390784674e-05, + "loss": 0.1777, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18693730235099792, + "step": 2450, + "valid_targets_mean": 5479.7, + "valid_targets_min": 683 + }, + { + "epoch": 3.62629246676514, + "grad_norm": 0.41270041132362517, + "learning_rate": 2.2241903670312516e-05, + "loss": 0.1769, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1751289963722229, + "step": 2455, + "valid_targets_mean": 6304.4, + "valid_targets_min": 729 + }, + { + "epoch": 3.633677991137371, + "grad_norm": 0.37919593321209355, + "learning_rate": 2.216869302265785e-05, + "loss": 0.1683, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.180336594581604, + "step": 2460, + "valid_targets_mean": 6089.2, + "valid_targets_min": 683 + }, + { + "epoch": 3.641063515509601, + "grad_norm": 0.41713272138692914, + "learning_rate": 2.2095452957942643e-05, + "loss": 0.1783, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17298147082328796, + "step": 2465, + "valid_targets_mean": 5199.1, + "valid_targets_min": 547 + }, + { + "epoch": 3.6484490398818314, + "grad_norm": 0.5197567957007619, + "learning_rate": 2.2022184469625797e-05, + "loss": 0.1859, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1868414729833603, + "step": 2470, + "valid_targets_mean": 4258.9, + "valid_targets_min": 620 + }, + { + "epoch": 3.655834564254062, + "grad_norm": 0.39404420347224917, + "learning_rate": 2.1948888551551795e-05, + "loss": 0.175, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16973114013671875, + "step": 2475, + "valid_targets_mean": 5702.5, + "valid_targets_min": 528 + }, + { + "epoch": 3.6632200886262924, + "grad_norm": 0.49672107594538334, + "learning_rate": 2.1875566197937154e-05, + "loss": 0.1907, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22505685687065125, + "step": 2480, + "valid_targets_mean": 4229.1, + "valid_targets_min": 614 + }, + { + "epoch": 3.670605612998523, + "grad_norm": 0.5386862158554978, + "learning_rate": 2.1802218403357003e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18474605679512024, + "step": 2485, + "valid_targets_mean": 3811.4, + "valid_targets_min": 608 + }, + { + "epoch": 3.6779911373707534, + "grad_norm": 0.4199309274764877, + "learning_rate": 2.1728846162731547e-05, + "loss": 0.1781, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16229534149169922, + "step": 2490, + "valid_targets_mean": 4422.1, + "valid_targets_min": 602 + }, + { + "epoch": 3.6853766617429837, + "grad_norm": 0.44213788636825774, + "learning_rate": 2.1655450471312587e-05, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16909149289131165, + "step": 2495, + "valid_targets_mean": 4642.9, + "valid_targets_min": 670 + }, + { + "epoch": 3.692762186115214, + "grad_norm": 0.47756920378888834, + "learning_rate": 2.1582032324670024e-05, + "loss": 0.199, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21273420751094818, + "step": 2500, + "valid_targets_mean": 4483.9, + "valid_targets_min": 573 + }, + { + "epoch": 3.7001477104874447, + "grad_norm": 1.7776796913692625, + "learning_rate": 2.150859271867836e-05, + "loss": 0.1863, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20686309039592743, + "step": 2505, + "valid_targets_mean": 3814.2, + "valid_targets_min": 282 + }, + { + "epoch": 3.707533234859675, + "grad_norm": 0.4541222702170866, + "learning_rate": 2.143513264950316e-05, + "loss": 0.1779, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16933034360408783, + "step": 2510, + "valid_targets_mean": 4283.7, + "valid_targets_min": 360 + }, + { + "epoch": 3.7149187592319057, + "grad_norm": 0.512488878559065, + "learning_rate": 2.1361653113587583e-05, + "loss": 0.1981, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23736466467380524, + "step": 2515, + "valid_targets_mean": 4091.1, + "valid_targets_min": 553 + }, + { + "epoch": 3.722304283604136, + "grad_norm": 0.5026024047876461, + "learning_rate": 2.128815510763882e-05, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19661220908164978, + "step": 2520, + "valid_targets_mean": 3568.8, + "valid_targets_min": 479 + }, + { + "epoch": 3.7296898079763663, + "grad_norm": 0.4756178800621908, + "learning_rate": 2.1214639628614618e-05, + "loss": 0.1717, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2067551612854004, + "step": 2525, + "valid_targets_mean": 4420.6, + "valid_targets_min": 535 + }, + { + "epoch": 3.7370753323485966, + "grad_norm": 0.4481991423508541, + "learning_rate": 2.1141107673709727e-05, + "loss": 0.1831, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2029070109128952, + "step": 2530, + "valid_targets_mean": 4889.3, + "valid_targets_min": 671 + }, + { + "epoch": 3.7444608567208273, + "grad_norm": 0.4486218822810619, + "learning_rate": 2.1067560240342382e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19874705374240875, + "step": 2535, + "valid_targets_mean": 4725.5, + "valid_targets_min": 309 + }, + { + "epoch": 3.7518463810930576, + "grad_norm": 0.42580796837346163, + "learning_rate": 2.0993998326140774e-05, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17727911472320557, + "step": 2540, + "valid_targets_mean": 4696.8, + "valid_targets_min": 629 + }, + { + "epoch": 3.7592319054652883, + "grad_norm": 0.46276463063827256, + "learning_rate": 2.0920422928929514e-05, + "loss": 0.165, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21701931953430176, + "step": 2545, + "valid_targets_mean": 5121.4, + "valid_targets_min": 620 + }, + { + "epoch": 3.7666174298375186, + "grad_norm": 0.39520678275058635, + "learning_rate": 2.0846835046716124e-05, + "loss": 0.1798, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2055937796831131, + "step": 2550, + "valid_targets_mean": 6235.9, + "valid_targets_min": 743 + }, + { + "epoch": 3.774002954209749, + "grad_norm": 0.3749059071380504, + "learning_rate": 2.0773235677677453e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1439509242773056, + "step": 2555, + "valid_targets_mean": 5177.5, + "valid_targets_min": 601 + }, + { + "epoch": 3.781388478581979, + "grad_norm": 0.4376582038796636, + "learning_rate": 2.0699625820146166e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19691312313079834, + "step": 2560, + "valid_targets_mean": 4715.4, + "valid_targets_min": 388 + }, + { + "epoch": 3.78877400295421, + "grad_norm": 0.4555515812161259, + "learning_rate": 2.0626006472597203e-05, + "loss": 0.1937, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2008768618106842, + "step": 2565, + "valid_targets_mean": 4623.0, + "valid_targets_min": 731 + }, + { + "epoch": 3.79615952732644, + "grad_norm": 0.459171004943161, + "learning_rate": 2.055237863363424e-05, + "loss": 0.1948, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21966755390167236, + "step": 2570, + "valid_targets_mean": 4648.9, + "valid_targets_min": 615 + }, + { + "epoch": 3.803545051698671, + "grad_norm": 0.4284579195415132, + "learning_rate": 2.047874330197612e-05, + "loss": 0.1984, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17750242352485657, + "step": 2575, + "valid_targets_mean": 5786.1, + "valid_targets_min": 697 + }, + { + "epoch": 3.810930576070901, + "grad_norm": 0.5052628017425496, + "learning_rate": 2.040510147644332e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1922721564769745, + "step": 2580, + "valid_targets_mean": 4639.9, + "valid_targets_min": 664 + }, + { + "epoch": 3.8183161004431314, + "grad_norm": 0.5209698856989999, + "learning_rate": 2.033145415594441e-05, + "loss": 0.1807, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1749410480260849, + "step": 2585, + "valid_targets_mean": 4522.6, + "valid_targets_min": 314 + }, + { + "epoch": 3.8257016248153617, + "grad_norm": 0.5289497505816443, + "learning_rate": 2.0257802339462497e-05, + "loss": 0.2188, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2323957085609436, + "step": 2590, + "valid_targets_mean": 3958.1, + "valid_targets_min": 257 + }, + { + "epoch": 3.8330871491875924, + "grad_norm": 0.4429473903371753, + "learning_rate": 2.018414702604167e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17129692435264587, + "step": 2595, + "valid_targets_mean": 4951.2, + "valid_targets_min": 535 + }, + { + "epoch": 3.8404726735598227, + "grad_norm": 0.5664110286490077, + "learning_rate": 2.011048921477345e-05, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19464537501335144, + "step": 2600, + "valid_targets_mean": 4107.1, + "valid_targets_min": 743 + }, + { + "epoch": 3.8478581979320534, + "grad_norm": 0.48372759494178025, + "learning_rate": 2.0036829904783234e-05, + "loss": 0.1714, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15631017088890076, + "step": 2605, + "valid_targets_mean": 5911.1, + "valid_targets_min": 535 + }, + { + "epoch": 3.8552437223042837, + "grad_norm": 0.5771058795231779, + "learning_rate": 1.996317009521677e-05, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16237816214561462, + "step": 2610, + "valid_targets_mean": 4347.8, + "valid_targets_min": 565 + }, + { + "epoch": 3.862629246676514, + "grad_norm": 0.3451442793961211, + "learning_rate": 1.9889510785226556e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16310572624206543, + "step": 2615, + "valid_targets_mean": 6855.8, + "valid_targets_min": 537 + }, + { + "epoch": 3.8700147710487443, + "grad_norm": 0.47160578182778307, + "learning_rate": 1.981585297395833e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18903210759162903, + "step": 2620, + "valid_targets_mean": 4925.4, + "valid_targets_min": 490 + }, + { + "epoch": 3.877400295420975, + "grad_norm": 0.4112730275680861, + "learning_rate": 1.9742197660537503e-05, + "loss": 0.1801, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1774231493473053, + "step": 2625, + "valid_targets_mean": 4865.4, + "valid_targets_min": 566 + }, + { + "epoch": 3.8847858197932053, + "grad_norm": 0.3852153308078606, + "learning_rate": 1.96685458440556e-05, + "loss": 0.1918, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21487516164779663, + "step": 2630, + "valid_targets_mean": 6536.1, + "valid_targets_min": 666 + }, + { + "epoch": 3.892171344165436, + "grad_norm": 0.4266578300389501, + "learning_rate": 1.9594898523556688e-05, + "loss": 0.2017, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1812269687652588, + "step": 2635, + "valid_targets_mean": 4853.8, + "valid_targets_min": 509 + }, + { + "epoch": 3.8995568685376663, + "grad_norm": 0.47478356489339524, + "learning_rate": 1.952125669802389e-05, + "loss": 0.1634, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1804153323173523, + "step": 2640, + "valid_targets_mean": 4120.0, + "valid_targets_min": 229 + }, + { + "epoch": 3.9069423929098965, + "grad_norm": 0.3984291783021106, + "learning_rate": 1.944762136636577e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14626066386699677, + "step": 2645, + "valid_targets_mean": 5470.4, + "valid_targets_min": 482 + }, + { + "epoch": 3.914327917282127, + "grad_norm": 0.4145840145456443, + "learning_rate": 1.9373993527402803e-05, + "loss": 0.1842, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17797842621803284, + "step": 2650, + "valid_targets_mean": 5413.2, + "valid_targets_min": 524 + }, + { + "epoch": 3.9217134416543575, + "grad_norm": 0.43768905570389377, + "learning_rate": 1.930037417985384e-05, + "loss": 0.1846, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1793164610862732, + "step": 2655, + "valid_targets_mean": 4557.5, + "valid_targets_min": 563 + }, + { + "epoch": 3.929098966026588, + "grad_norm": 0.4471133890380236, + "learning_rate": 1.9226764322322554e-05, + "loss": 0.183, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21864616870880127, + "step": 2660, + "valid_targets_mean": 5254.9, + "valid_targets_min": 788 + }, + { + "epoch": 3.9364844903988185, + "grad_norm": 0.4345555815624583, + "learning_rate": 1.915316495328388e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18304719030857086, + "step": 2665, + "valid_targets_mean": 4897.6, + "valid_targets_min": 530 + }, + { + "epoch": 3.943870014771049, + "grad_norm": 0.46340151961537496, + "learning_rate": 1.9079577071070486e-05, + "loss": 0.1697, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1812700629234314, + "step": 2670, + "valid_targets_mean": 4532.0, + "valid_targets_min": 570 + }, + { + "epoch": 3.951255539143279, + "grad_norm": 0.46722063462251806, + "learning_rate": 1.9006001673859236e-05, + "loss": 0.1755, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18102481961250305, + "step": 2675, + "valid_targets_mean": 4135.3, + "valid_targets_min": 531 + }, + { + "epoch": 3.9586410635155094, + "grad_norm": 0.46809132826813665, + "learning_rate": 1.8932439759657628e-05, + "loss": 0.179, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2036694884300232, + "step": 2680, + "valid_targets_mean": 4903.4, + "valid_targets_min": 455 + }, + { + "epoch": 3.96602658788774, + "grad_norm": 0.4224456008680522, + "learning_rate": 1.8858892326290283e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20503783226013184, + "step": 2685, + "valid_targets_mean": 4960.2, + "valid_targets_min": 563 + }, + { + "epoch": 3.9734121122599704, + "grad_norm": 0.3976755577199375, + "learning_rate": 1.878536037138539e-05, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16833657026290894, + "step": 2690, + "valid_targets_mean": 5052.5, + "valid_targets_min": 660 + }, + { + "epoch": 3.980797636632201, + "grad_norm": 0.4015922991970415, + "learning_rate": 1.8711844892361187e-05, + "loss": 0.1908, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17497824132442474, + "step": 2695, + "valid_targets_mean": 6211.0, + "valid_targets_min": 703 + }, + { + "epoch": 3.9881831610044314, + "grad_norm": 0.46100259598039117, + "learning_rate": 1.8638346886412427e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17598775029182434, + "step": 2700, + "valid_targets_mean": 4511.3, + "valid_targets_min": 677 + }, + { + "epoch": 3.9955686853766617, + "grad_norm": 0.5263845759468841, + "learning_rate": 1.8564867350496845e-05, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1905372142791748, + "step": 2705, + "valid_targets_mean": 3814.4, + "valid_targets_min": 561 + }, + { + "epoch": 4.002954209748892, + "grad_norm": 0.34980228182591305, + "learning_rate": 1.8491407281321647e-05, + "loss": 0.172, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14900560677051544, + "step": 2710, + "valid_targets_mean": 6249.2, + "valid_targets_min": 320 + }, + { + "epoch": 4.010339734121122, + "grad_norm": 0.5143265300926454, + "learning_rate": 1.8417967675329975e-05, + "loss": 0.1616, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17539595067501068, + "step": 2715, + "valid_targets_mean": 3539.0, + "valid_targets_min": 433 + }, + { + "epoch": 4.017725258493353, + "grad_norm": 0.3935516918907612, + "learning_rate": 1.8344549528687423e-05, + "loss": 0.1737, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18661487102508545, + "step": 2720, + "valid_targets_mean": 6973.4, + "valid_targets_min": 441 + }, + { + "epoch": 4.025110782865584, + "grad_norm": 0.44755179204475903, + "learning_rate": 1.8271153837268464e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15159180760383606, + "step": 2725, + "valid_targets_mean": 4417.1, + "valid_targets_min": 673 + }, + { + "epoch": 4.032496307237814, + "grad_norm": 0.42283273517778147, + "learning_rate": 1.8197781596643e-05, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18647818267345428, + "step": 2730, + "valid_targets_mean": 5626.2, + "valid_targets_min": 648 + }, + { + "epoch": 4.039881831610044, + "grad_norm": 0.39804774495793865, + "learning_rate": 1.812443380206285e-05, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13898664712905884, + "step": 2735, + "valid_targets_mean": 5607.3, + "valid_targets_min": 827 + }, + { + "epoch": 4.0472673559822745, + "grad_norm": 0.4952592545309753, + "learning_rate": 1.805111144844821e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17651236057281494, + "step": 2740, + "valid_targets_mean": 4273.1, + "valid_targets_min": 564 + }, + { + "epoch": 4.054652880354505, + "grad_norm": 0.5267789359486101, + "learning_rate": 1.7977815530374207e-05, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16626033186912537, + "step": 2745, + "valid_targets_mean": 3623.0, + "valid_targets_min": 125 + }, + { + "epoch": 4.062038404726736, + "grad_norm": 0.5196745978105738, + "learning_rate": 1.7904547042057364e-05, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16587507724761963, + "step": 2750, + "valid_targets_mean": 4396.9, + "valid_targets_min": 466 + }, + { + "epoch": 4.069423929098966, + "grad_norm": 0.4220475584040045, + "learning_rate": 1.7831306977342152e-05, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14500692486763, + "step": 2755, + "valid_targets_mean": 4850.2, + "valid_targets_min": 524 + }, + { + "epoch": 4.0768094534711965, + "grad_norm": 0.42814857370303006, + "learning_rate": 1.7758096329687488e-05, + "loss": 0.1562, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13999520242214203, + "step": 2760, + "valid_targets_mean": 5521.1, + "valid_targets_min": 606 + }, + { + "epoch": 4.084194977843427, + "grad_norm": 0.3859057452591373, + "learning_rate": 1.7684916092153262e-05, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14261415600776672, + "step": 2765, + "valid_targets_mean": 5565.9, + "valid_targets_min": 672 + }, + { + "epoch": 4.091580502215657, + "grad_norm": 0.42241050834024985, + "learning_rate": 1.7611767257386864e-05, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1470344364643097, + "step": 2770, + "valid_targets_mean": 5220.9, + "valid_targets_min": 674 + }, + { + "epoch": 4.098966026587887, + "grad_norm": 0.416789874748107, + "learning_rate": 1.753865081760973e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17493118345737457, + "step": 2775, + "valid_targets_mean": 5951.9, + "valid_targets_min": 606 + }, + { + "epoch": 4.1063515509601185, + "grad_norm": 0.46960220255251917, + "learning_rate": 1.7465567764603876e-05, + "loss": 0.1671, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17590011656284332, + "step": 2780, + "valid_targets_mean": 4606.5, + "valid_targets_min": 605 + }, + { + "epoch": 4.113737075332349, + "grad_norm": 0.4907659502885251, + "learning_rate": 1.739251908969844e-05, + "loss": 0.1695, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20186254382133484, + "step": 2785, + "valid_targets_mean": 4470.1, + "valid_targets_min": 718 + }, + { + "epoch": 4.121122599704579, + "grad_norm": 0.3933868369919279, + "learning_rate": 1.7319505783756248e-05, + "loss": 0.1409, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12017552554607391, + "step": 2790, + "valid_targets_mean": 6450.1, + "valid_targets_min": 476 + }, + { + "epoch": 4.128508124076809, + "grad_norm": 0.4949323916381371, + "learning_rate": 1.724652883716037e-05, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15612033009529114, + "step": 2795, + "valid_targets_mean": 4150.4, + "valid_targets_min": 518 + }, + { + "epoch": 4.13589364844904, + "grad_norm": 0.40509378331922025, + "learning_rate": 1.7173589239800673e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22377564013004303, + "step": 2800, + "valid_targets_mean": 6398.9, + "valid_targets_min": 838 + }, + { + "epoch": 4.14327917282127, + "grad_norm": 0.4108689853913775, + "learning_rate": 1.7100687981060405e-05, + "loss": 0.1789, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1769275963306427, + "step": 2805, + "valid_targets_mean": 6090.0, + "valid_targets_min": 1026 + }, + { + "epoch": 4.150664697193501, + "grad_norm": 0.5162692493811, + "learning_rate": 1.7027826049802787e-05, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1626986861228943, + "step": 2810, + "valid_targets_mean": 3471.9, + "valid_targets_min": 454 + }, + { + "epoch": 4.158050221565731, + "grad_norm": 0.5312767885071268, + "learning_rate": 1.6955004434357557e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20601385831832886, + "step": 2815, + "valid_targets_mean": 4762.1, + "valid_targets_min": 355 + }, + { + "epoch": 4.165435745937962, + "grad_norm": 0.4235145894913193, + "learning_rate": 1.6882224122507617e-05, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1396898776292801, + "step": 2820, + "valid_targets_mean": 5606.3, + "valid_targets_min": 456 + }, + { + "epoch": 4.172821270310192, + "grad_norm": 0.416756640748203, + "learning_rate": 1.6809486101475595e-05, + "loss": 0.1631, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21778830885887146, + "step": 2825, + "valid_targets_mean": 7377.4, + "valid_targets_min": 657 + }, + { + "epoch": 4.180206794682422, + "grad_norm": 0.5252822718202776, + "learning_rate": 1.673679135791049e-05, + "loss": 0.177, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19100016355514526, + "step": 2830, + "valid_targets_mean": 4078.6, + "valid_targets_min": 584 + }, + { + "epoch": 4.1875923190546525, + "grad_norm": 0.529831098741841, + "learning_rate": 1.6664140877874245e-05, + "loss": 0.1645, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18276384472846985, + "step": 2835, + "valid_targets_mean": 3844.6, + "valid_targets_min": 532 + }, + { + "epoch": 4.194977843426884, + "grad_norm": 0.3492533131434477, + "learning_rate": 1.65915356468284e-05, + "loss": 0.1474, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11477085202932358, + "step": 2840, + "valid_targets_mean": 7473.1, + "valid_targets_min": 829 + }, + { + "epoch": 4.202363367799114, + "grad_norm": 0.46188579373201377, + "learning_rate": 1.651897664962072e-05, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15958110988140106, + "step": 2845, + "valid_targets_mean": 4629.6, + "valid_targets_min": 477 + }, + { + "epoch": 4.209748892171344, + "grad_norm": 0.36990404396979687, + "learning_rate": 1.644646487047184e-05, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1521548181772232, + "step": 2850, + "valid_targets_mean": 7015.2, + "valid_targets_min": 861 + }, + { + "epoch": 4.2171344165435745, + "grad_norm": 0.4062689203699344, + "learning_rate": 1.6374001292961915e-05, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1495155543088913, + "step": 2855, + "valid_targets_mean": 5666.6, + "valid_targets_min": 760 + }, + { + "epoch": 4.224519940915805, + "grad_norm": 0.4252533883734268, + "learning_rate": 1.630158690001723e-05, + "loss": 0.1794, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17348308861255646, + "step": 2860, + "valid_targets_mean": 5389.8, + "valid_targets_min": 461 + }, + { + "epoch": 4.231905465288035, + "grad_norm": 0.44977416984855956, + "learning_rate": 1.6229222673896977e-05, + "loss": 0.1767, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19169771671295166, + "step": 2865, + "valid_targets_mean": 6240.2, + "valid_targets_min": 648 + }, + { + "epoch": 4.239290989660266, + "grad_norm": 0.517268002202166, + "learning_rate": 1.615690959617981e-05, + "loss": 0.1882, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1724705845117569, + "step": 2870, + "valid_targets_mean": 4480.9, + "valid_targets_min": 546 + }, + { + "epoch": 4.2466765140324965, + "grad_norm": 0.5034227859483678, + "learning_rate": 1.608464864775061e-05, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15206247568130493, + "step": 2875, + "valid_targets_mean": 5756.9, + "valid_targets_min": 883 + }, + { + "epoch": 4.254062038404727, + "grad_norm": 0.3934998046892, + "learning_rate": 1.601244080878715e-05, + "loss": 0.1657, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1561555415391922, + "step": 2880, + "valid_targets_mean": 7151.8, + "valid_targets_min": 637 + }, + { + "epoch": 4.261447562776957, + "grad_norm": 0.4021841720607587, + "learning_rate": 1.5940287058746805e-05, + "loss": 0.1617, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14158207178115845, + "step": 2885, + "valid_targets_mean": 4988.3, + "valid_targets_min": 691 + }, + { + "epoch": 4.268833087149187, + "grad_norm": 0.6182741776921272, + "learning_rate": 1.5868188376353258e-05, + "loss": 0.1666, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1992129683494568, + "step": 2890, + "valid_targets_mean": 5678.7, + "valid_targets_min": 778 + }, + { + "epoch": 4.2762186115214185, + "grad_norm": 0.4614176577748903, + "learning_rate": 1.579614573958324e-05, + "loss": 0.161, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17441459000110626, + "step": 2895, + "valid_targets_mean": 5331.5, + "valid_targets_min": 324 + }, + { + "epoch": 4.283604135893649, + "grad_norm": 0.5039839970397774, + "learning_rate": 1.5724160125653253e-05, + "loss": 0.1745, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18613968789577484, + "step": 2900, + "valid_targets_mean": 4360.8, + "valid_targets_min": 575 + }, + { + "epoch": 4.290989660265879, + "grad_norm": 0.56591190309574, + "learning_rate": 1.5652232511006313e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21027769148349762, + "step": 2905, + "valid_targets_mean": 3856.9, + "valid_targets_min": 362 + }, + { + "epoch": 4.298375184638109, + "grad_norm": 0.5045039977125617, + "learning_rate": 1.5580363871298713e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15514475107192993, + "step": 2910, + "valid_targets_mean": 4200.1, + "valid_targets_min": 425 + }, + { + "epoch": 4.30576070901034, + "grad_norm": 0.5630509270539151, + "learning_rate": 1.550855518138679e-05, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2181616574525833, + "step": 2915, + "valid_targets_mean": 3684.9, + "valid_targets_min": 668 + }, + { + "epoch": 4.31314623338257, + "grad_norm": 0.50018909523244, + "learning_rate": 1.543680741531368e-05, + "loss": 0.1833, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1719134896993637, + "step": 2920, + "valid_targets_mean": 4710.3, + "valid_targets_min": 575 + }, + { + "epoch": 4.3205317577548, + "grad_norm": 0.446233266875378, + "learning_rate": 1.536512154629615e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15354609489440918, + "step": 2925, + "valid_targets_mean": 4676.2, + "valid_targets_min": 538 + }, + { + "epoch": 4.327917282127031, + "grad_norm": 0.4418361621253541, + "learning_rate": 1.5293498546711347e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21994024515151978, + "step": 2930, + "valid_targets_mean": 5910.9, + "valid_targets_min": 541 + }, + { + "epoch": 4.335302806499262, + "grad_norm": 0.43246616750276806, + "learning_rate": 1.5221939388083638e-05, + "loss": 0.1754, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14813780784606934, + "step": 2935, + "valid_targets_mean": 5155.4, + "valid_targets_min": 608 + }, + { + "epoch": 4.342688330871492, + "grad_norm": 0.5184902584890491, + "learning_rate": 1.5150445041071418e-05, + "loss": 0.18, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18272896111011505, + "step": 2940, + "valid_targets_mean": 3901.2, + "valid_targets_min": 708 + }, + { + "epoch": 4.350073855243722, + "grad_norm": 0.46142756709679333, + "learning_rate": 1.5079016475453974e-05, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15337729454040527, + "step": 2945, + "valid_targets_mean": 4455.8, + "valid_targets_min": 477 + }, + { + "epoch": 4.357459379615952, + "grad_norm": 0.4202013962683297, + "learning_rate": 1.500765466011827e-05, + "loss": 0.1425, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14253225922584534, + "step": 2950, + "valid_targets_mean": 5328.2, + "valid_targets_min": 747 + }, + { + "epoch": 4.364844903988184, + "grad_norm": 0.4223795000780752, + "learning_rate": 1.4936360563045863e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1546299159526825, + "step": 2955, + "valid_targets_mean": 4801.2, + "valid_targets_min": 228 + }, + { + "epoch": 4.372230428360414, + "grad_norm": 0.5947413428907822, + "learning_rate": 1.4865135151299756e-05, + "loss": 0.1765, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18521949648857117, + "step": 2960, + "valid_targets_mean": 3601.5, + "valid_targets_min": 481 + }, + { + "epoch": 4.379615952732644, + "grad_norm": 0.4824214614001069, + "learning_rate": 1.4793979391011264e-05, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1658276468515396, + "step": 2965, + "valid_targets_mean": 4537.8, + "valid_targets_min": 596 + }, + { + "epoch": 4.387001477104874, + "grad_norm": 0.4535168608231596, + "learning_rate": 1.4722894247366925e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17292077839374542, + "step": 2970, + "valid_targets_mean": 4852.9, + "valid_targets_min": 215 + }, + { + "epoch": 4.394387001477105, + "grad_norm": 0.43921335228854064, + "learning_rate": 1.4651880684595394e-05, + "loss": 0.1775, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18284352123737335, + "step": 2975, + "valid_targets_mean": 5821.1, + "valid_targets_min": 552 + }, + { + "epoch": 4.401772525849335, + "grad_norm": 0.47059015487422473, + "learning_rate": 1.4580939665954396e-05, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1612253487110138, + "step": 2980, + "valid_targets_mean": 4167.0, + "valid_targets_min": 686 + }, + { + "epoch": 4.409158050221565, + "grad_norm": 0.6361271510225736, + "learning_rate": 1.4510072153717613e-05, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17631939053535461, + "step": 2985, + "valid_targets_mean": 3730.2, + "valid_targets_min": 626 + }, + { + "epoch": 4.416543574593796, + "grad_norm": 0.4847929947491163, + "learning_rate": 1.4439279109161673e-05, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16035814583301544, + "step": 2990, + "valid_targets_mean": 4219.9, + "valid_targets_min": 652 + }, + { + "epoch": 4.423929098966027, + "grad_norm": 0.4246728217669771, + "learning_rate": 1.4368561492553074e-05, + "loss": 0.1704, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19435647130012512, + "step": 2995, + "valid_targets_mean": 5936.1, + "valid_targets_min": 615 + }, + { + "epoch": 4.431314623338257, + "grad_norm": 0.5841115607984504, + "learning_rate": 1.4297920263135194e-05, + "loss": 0.1726, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18300238251686096, + "step": 3000, + "valid_targets_mean": 3174.0, + "valid_targets_min": 355 + }, + { + "epoch": 4.438700147710487, + "grad_norm": 0.44672860156890554, + "learning_rate": 1.4227356379115256e-05, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18793809413909912, + "step": 3005, + "valid_targets_mean": 5597.8, + "valid_targets_min": 879 + }, + { + "epoch": 4.4460856720827175, + "grad_norm": 0.38476310411513204, + "learning_rate": 1.4156870797651343e-05, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1833447515964508, + "step": 3010, + "valid_targets_mean": 6966.1, + "valid_targets_min": 506 + }, + { + "epoch": 4.453471196454949, + "grad_norm": 0.42279572173644614, + "learning_rate": 1.4086464474839399e-05, + "loss": 0.1927, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1899091899394989, + "step": 3015, + "valid_targets_mean": 5839.3, + "valid_targets_min": 816 + }, + { + "epoch": 4.460856720827179, + "grad_norm": 0.44031418366204805, + "learning_rate": 1.4016138365700283e-05, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17797401547431946, + "step": 3020, + "valid_targets_mean": 6834.4, + "valid_targets_min": 628 + }, + { + "epoch": 4.468242245199409, + "grad_norm": 0.48920064870797775, + "learning_rate": 1.3945893424166799e-05, + "loss": 0.1684, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18216978013515472, + "step": 3025, + "valid_targets_mean": 4377.6, + "valid_targets_min": 573 + }, + { + "epoch": 4.4756277695716395, + "grad_norm": 0.4860440678191081, + "learning_rate": 1.3875730603070755e-05, + "loss": 0.1687, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17842218279838562, + "step": 3030, + "valid_targets_mean": 4298.3, + "valid_targets_min": 519 + }, + { + "epoch": 4.48301329394387, + "grad_norm": 0.5122645397551936, + "learning_rate": 1.3805650854130042e-05, + "loss": 0.1679, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16408318281173706, + "step": 3035, + "valid_targets_mean": 5356.2, + "valid_targets_min": 612 + }, + { + "epoch": 4.4903988183161, + "grad_norm": 0.5777970599819502, + "learning_rate": 1.3735655127935727e-05, + "loss": 0.1673, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18035376071929932, + "step": 3040, + "valid_targets_mean": 3534.7, + "valid_targets_min": 510 + }, + { + "epoch": 4.497784342688331, + "grad_norm": 0.40634258250665284, + "learning_rate": 1.3665744373939165e-05, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1392553746700287, + "step": 3045, + "valid_targets_mean": 6123.2, + "valid_targets_min": 768 + }, + { + "epoch": 4.5051698670605616, + "grad_norm": 0.4222640132338195, + "learning_rate": 1.3595919540439098e-05, + "loss": 0.1582, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15259933471679688, + "step": 3050, + "valid_targets_mean": 4889.3, + "valid_targets_min": 475 + }, + { + "epoch": 4.512555391432792, + "grad_norm": 0.42710032158994776, + "learning_rate": 1.352618157456882e-05, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1755707710981369, + "step": 3055, + "valid_targets_mean": 5323.7, + "valid_targets_min": 573 + }, + { + "epoch": 4.519940915805022, + "grad_norm": 0.4546554151450125, + "learning_rate": 1.3456531422283314e-05, + "loss": 0.1814, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16564080119132996, + "step": 3060, + "valid_targets_mean": 5748.6, + "valid_targets_min": 635 + }, + { + "epoch": 4.527326440177252, + "grad_norm": 0.3920659302495498, + "learning_rate": 1.3386970028346414e-05, + "loss": 0.158, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14143362641334534, + "step": 3065, + "valid_targets_mean": 6217.1, + "valid_targets_min": 637 + }, + { + "epoch": 4.534711964549483, + "grad_norm": 0.6631096404799323, + "learning_rate": 1.3317498336318001e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14858531951904297, + "step": 3070, + "valid_targets_mean": 5357.5, + "valid_targets_min": 802 + }, + { + "epoch": 4.542097488921714, + "grad_norm": 0.36924744404698256, + "learning_rate": 1.3248117288541224e-05, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15762075781822205, + "step": 3075, + "valid_targets_mean": 6748.7, + "valid_targets_min": 1209 + }, + { + "epoch": 4.549483013293944, + "grad_norm": 0.4132566623757397, + "learning_rate": 1.3178827826129653e-05, + "loss": 0.1528, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15297694504261017, + "step": 3080, + "valid_targets_mean": 5218.4, + "valid_targets_min": 629 + }, + { + "epoch": 4.556868537666174, + "grad_norm": 0.6105683360071116, + "learning_rate": 1.3109630888954586e-05, + "loss": 0.1953, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18115362524986267, + "step": 3085, + "valid_targets_mean": 2799.2, + "valid_targets_min": 518 + }, + { + "epoch": 4.564254062038405, + "grad_norm": 0.4312209357104268, + "learning_rate": 1.3040527415632264e-05, + "loss": 0.1705, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13422748446464539, + "step": 3090, + "valid_targets_mean": 5207.3, + "valid_targets_min": 750 + }, + { + "epoch": 4.571639586410635, + "grad_norm": 0.5345112264129266, + "learning_rate": 1.297151834351115e-05, + "loss": 0.1813, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2037266194820404, + "step": 3095, + "valid_targets_mean": 4426.9, + "valid_targets_min": 584 + }, + { + "epoch": 4.579025110782865, + "grad_norm": 0.48644514959603063, + "learning_rate": 1.2902604608659195e-05, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19521738588809967, + "step": 3100, + "valid_targets_mean": 4656.5, + "valid_targets_min": 536 + }, + { + "epoch": 4.586410635155096, + "grad_norm": 0.4686584623782236, + "learning_rate": 1.2833787145851164e-05, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16689223051071167, + "step": 3105, + "valid_targets_mean": 4490.6, + "valid_targets_min": 675 + }, + { + "epoch": 4.593796159527327, + "grad_norm": 0.4343439566599933, + "learning_rate": 1.2765066888555957e-05, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19107720255851746, + "step": 3110, + "valid_targets_mean": 5998.3, + "valid_targets_min": 735 + }, + { + "epoch": 4.601181683899557, + "grad_norm": 0.48092468519713316, + "learning_rate": 1.2696444768923919e-05, + "loss": 0.1721, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18640181422233582, + "step": 3115, + "valid_targets_mean": 4409.0, + "valid_targets_min": 492 + }, + { + "epoch": 4.608567208271787, + "grad_norm": 0.3949210579717376, + "learning_rate": 1.2627921717774236e-05, + "loss": 0.189, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16668841242790222, + "step": 3120, + "valid_targets_mean": 6662.4, + "valid_targets_min": 375 + }, + { + "epoch": 4.6159527326440175, + "grad_norm": 0.3653101251021582, + "learning_rate": 1.2559498664582259e-05, + "loss": 0.1707, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17200833559036255, + "step": 3125, + "valid_targets_mean": 7024.5, + "valid_targets_min": 691 + }, + { + "epoch": 4.623338257016248, + "grad_norm": 0.5353189616488072, + "learning_rate": 1.2491176537466951e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18706795573234558, + "step": 3130, + "valid_targets_mean": 3664.9, + "valid_targets_min": 553 + }, + { + "epoch": 4.630723781388479, + "grad_norm": 0.48483146785334685, + "learning_rate": 1.242295626317826e-05, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18977820873260498, + "step": 3135, + "valid_targets_mean": 4671.8, + "valid_targets_min": 541 + }, + { + "epoch": 4.638109305760709, + "grad_norm": 0.4576012336580824, + "learning_rate": 1.2354838767084563e-05, + "loss": 0.1898, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1621273159980774, + "step": 3140, + "valid_targets_mean": 4912.6, + "valid_targets_min": 547 + }, + { + "epoch": 4.6454948301329395, + "grad_norm": 0.47408165212071834, + "learning_rate": 1.2286824973160103e-05, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14935819804668427, + "step": 3145, + "valid_targets_mean": 4476.2, + "valid_targets_min": 654 + }, + { + "epoch": 4.65288035450517, + "grad_norm": 0.35823980053826326, + "learning_rate": 1.2218915803972478e-05, + "loss": 0.1848, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15744267404079437, + "step": 3150, + "valid_targets_mean": 6967.7, + "valid_targets_min": 639 + }, + { + "epoch": 4.6602658788774, + "grad_norm": 0.3938649437885737, + "learning_rate": 1.2151112180670097e-05, + "loss": 0.1696, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15055982768535614, + "step": 3155, + "valid_targets_mean": 6357.4, + "valid_targets_min": 2129 + }, + { + "epoch": 4.66765140324963, + "grad_norm": 0.4159919225396087, + "learning_rate": 1.2083415022969712e-05, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1428435891866684, + "step": 3160, + "valid_targets_mean": 5190.1, + "valid_targets_min": 589 + }, + { + "epoch": 4.6750369276218615, + "grad_norm": 0.321718436567835, + "learning_rate": 1.2015825249143926e-05, + "loss": 0.1475, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11123082041740417, + "step": 3165, + "valid_targets_mean": 6443.4, + "valid_targets_min": 550 + }, + { + "epoch": 4.682422451994092, + "grad_norm": 0.5181376093279889, + "learning_rate": 1.1948343776008738e-05, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18044036626815796, + "step": 3170, + "valid_targets_mean": 5152.0, + "valid_targets_min": 476 + }, + { + "epoch": 4.689807976366322, + "grad_norm": 0.3835319842919287, + "learning_rate": 1.1880971518911112e-05, + "loss": 0.1832, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1430322527885437, + "step": 3175, + "valid_targets_mean": 5503.8, + "valid_targets_min": 599 + }, + { + "epoch": 4.697193500738552, + "grad_norm": 0.38392579284693557, + "learning_rate": 1.1813709391716568e-05, + "loss": 0.162, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14627256989479065, + "step": 3180, + "valid_targets_mean": 6006.3, + "valid_targets_min": 649 + }, + { + "epoch": 4.704579025110783, + "grad_norm": 0.4621220677999244, + "learning_rate": 1.1746558306796776e-05, + "loss": 0.1804, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17970584332942963, + "step": 3185, + "valid_targets_mean": 4955.9, + "valid_targets_min": 676 + }, + { + "epoch": 4.711964549483013, + "grad_norm": 0.4500367333603124, + "learning_rate": 1.1679519175017174e-05, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14992380142211914, + "step": 3190, + "valid_targets_mean": 4324.4, + "valid_targets_min": 615 + }, + { + "epoch": 4.719350073855244, + "grad_norm": 0.4306455292259303, + "learning_rate": 1.161259290572462e-05, + "loss": 0.1578, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1760706901550293, + "step": 3195, + "valid_targets_mean": 5846.6, + "valid_targets_min": 714 + }, + { + "epoch": 4.726735598227474, + "grad_norm": 0.5361598584911511, + "learning_rate": 1.1545780406735081e-05, + "loss": 0.1835, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16949506103992462, + "step": 3200, + "valid_targets_mean": 3808.7, + "valid_targets_min": 608 + }, + { + "epoch": 4.734121122599705, + "grad_norm": 0.4278388705938128, + "learning_rate": 1.1479082584321265e-05, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16112419962882996, + "step": 3205, + "valid_targets_mean": 5402.7, + "valid_targets_min": 614 + }, + { + "epoch": 4.741506646971935, + "grad_norm": 0.4272816557388593, + "learning_rate": 1.1412500343200384e-05, + "loss": 0.1457, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13227397203445435, + "step": 3210, + "valid_targets_mean": 5582.8, + "valid_targets_min": 229 + }, + { + "epoch": 4.748892171344165, + "grad_norm": 0.47634077338014624, + "learning_rate": 1.1346034586521828e-05, + "loss": 0.1659, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17866167426109314, + "step": 3215, + "valid_targets_mean": 4475.9, + "valid_targets_min": 595 + }, + { + "epoch": 4.7562776957163955, + "grad_norm": 0.4344869468282722, + "learning_rate": 1.127968621585497e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15781113505363464, + "step": 3220, + "valid_targets_mean": 5149.8, + "valid_targets_min": 702 + }, + { + "epoch": 4.763663220088627, + "grad_norm": 0.4467123123517859, + "learning_rate": 1.1213456131176894e-05, + "loss": 0.1815, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13696205615997314, + "step": 3225, + "valid_targets_mean": 4635.4, + "valid_targets_min": 355 + }, + { + "epoch": 4.771048744460857, + "grad_norm": 0.6578561115499146, + "learning_rate": 1.1147345230860225e-05, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1457718312740326, + "step": 3230, + "valid_targets_mean": 5075.1, + "valid_targets_min": 380 + }, + { + "epoch": 4.778434268833087, + "grad_norm": 0.47187291081005306, + "learning_rate": 1.1081354411660899e-05, + "loss": 0.1941, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22649861872196198, + "step": 3235, + "valid_targets_mean": 5602.9, + "valid_targets_min": 385 + }, + { + "epoch": 4.7858197932053175, + "grad_norm": 0.4585294146600305, + "learning_rate": 1.1015484568706025e-05, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15446168184280396, + "step": 3240, + "valid_targets_mean": 4190.6, + "valid_targets_min": 401 + }, + { + "epoch": 4.793205317577548, + "grad_norm": 0.4721836321102142, + "learning_rate": 1.0949736595481748e-05, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16026848554611206, + "step": 3245, + "valid_targets_mean": 4162.0, + "valid_targets_min": 211 + }, + { + "epoch": 4.800590841949779, + "grad_norm": 0.5097924145730403, + "learning_rate": 1.0884111383821115e-05, + "loss": 0.1841, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1608010232448578, + "step": 3250, + "valid_targets_mean": 4100.3, + "valid_targets_min": 559 + }, + { + "epoch": 4.807976366322009, + "grad_norm": 0.5121344596400246, + "learning_rate": 1.0818609823891988e-05, + "loss": 0.1852, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18942126631736755, + "step": 3255, + "valid_targets_mean": 4584.4, + "valid_targets_min": 529 + }, + { + "epoch": 4.8153618906942395, + "grad_norm": 0.39125128571958845, + "learning_rate": 1.0753232804184944e-05, + "loss": 0.1711, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1385767161846161, + "step": 3260, + "valid_targets_mean": 5840.9, + "valid_targets_min": 696 + }, + { + "epoch": 4.82274741506647, + "grad_norm": 0.519147963569654, + "learning_rate": 1.0687981211501286e-05, + "loss": 0.1902, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13257870078086853, + "step": 3265, + "valid_targets_mean": 5252.2, + "valid_targets_min": 832 + }, + { + "epoch": 4.8301329394387, + "grad_norm": 0.3882601827065478, + "learning_rate": 1.0622855930940935e-05, + "loss": 0.1788, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16074441373348236, + "step": 3270, + "valid_targets_mean": 5536.7, + "valid_targets_min": 717 + }, + { + "epoch": 4.83751846381093, + "grad_norm": 0.5424919781622314, + "learning_rate": 1.0557857845890486e-05, + "loss": 0.1676, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19361868500709534, + "step": 3275, + "valid_targets_mean": 4543.6, + "valid_targets_min": 246 + }, + { + "epoch": 4.844903988183161, + "grad_norm": 0.5134917623818339, + "learning_rate": 1.049298783801118e-05, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1247122585773468, + "step": 3280, + "valid_targets_mean": 4817.4, + "valid_targets_min": 457 + }, + { + "epoch": 4.852289512555392, + "grad_norm": 0.4500612814479838, + "learning_rate": 1.0428246787226979e-05, + "loss": 0.1748, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16459113359451294, + "step": 3285, + "valid_targets_mean": 4635.2, + "valid_targets_min": 531 + }, + { + "epoch": 4.859675036927622, + "grad_norm": 0.4535219744538525, + "learning_rate": 1.0363635571712614e-05, + "loss": 0.1689, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16566744446754456, + "step": 3290, + "valid_targets_mean": 5096.8, + "valid_targets_min": 592 + }, + { + "epoch": 4.867060561299852, + "grad_norm": 0.46962696061070613, + "learning_rate": 1.0299155067881673e-05, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19553884863853455, + "step": 3295, + "valid_targets_mean": 5114.3, + "valid_targets_min": 562 + }, + { + "epoch": 4.874446085672083, + "grad_norm": 0.39252568546578387, + "learning_rate": 1.023480615037472e-05, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1575905829668045, + "step": 3300, + "valid_targets_mean": 5711.9, + "valid_targets_min": 568 + }, + { + "epoch": 4.881831610044313, + "grad_norm": 4.133528284855488, + "learning_rate": 1.0170589692047428e-05, + "loss": 0.1493, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13656756281852722, + "step": 3305, + "valid_targets_mean": 4539.4, + "valid_targets_min": 494 + }, + { + "epoch": 4.889217134416544, + "grad_norm": 0.5686494863470835, + "learning_rate": 1.0106506563958733e-05, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19371378421783447, + "step": 3310, + "valid_targets_mean": 3573.2, + "valid_targets_min": 430 + }, + { + "epoch": 4.896602658788774, + "grad_norm": 0.4402520543361477, + "learning_rate": 1.0042557635359028e-05, + "loss": 0.1663, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1665995717048645, + "step": 3315, + "valid_targets_mean": 5250.2, + "valid_targets_min": 495 + }, + { + "epoch": 4.903988183161005, + "grad_norm": 0.41299530585836114, + "learning_rate": 9.978743773678362e-06, + "loss": 0.1636, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.173518568277359, + "step": 3320, + "valid_targets_mean": 5918.4, + "valid_targets_min": 583 + }, + { + "epoch": 4.911373707533235, + "grad_norm": 0.4208459436047827, + "learning_rate": 9.915065844514688e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12593166530132294, + "step": 3325, + "valid_targets_mean": 5046.3, + "valid_targets_min": 298 + }, + { + "epoch": 4.918759231905465, + "grad_norm": 0.5089228253284505, + "learning_rate": 9.851524711622111e-06, + "loss": 0.1642, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19019880890846252, + "step": 3330, + "valid_targets_mean": 4365.8, + "valid_targets_min": 683 + }, + { + "epoch": 4.926144756277695, + "grad_norm": 0.3548181182634598, + "learning_rate": 9.78812123689917e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16198115050792694, + "step": 3335, + "valid_targets_mean": 7325.5, + "valid_targets_min": 469 + }, + { + "epoch": 4.933530280649926, + "grad_norm": 0.4847190504144033, + "learning_rate": 9.724856280377155e-06, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19115082919597626, + "step": 3340, + "valid_targets_mean": 4280.8, + "valid_targets_min": 655 + }, + { + "epoch": 4.940915805022157, + "grad_norm": 0.45392216834170873, + "learning_rate": 9.661730700208434e-06, + "loss": 0.1774, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21955522894859314, + "step": 3345, + "valid_targets_mean": 4720.2, + "valid_targets_min": 383 + }, + { + "epoch": 4.948301329394387, + "grad_norm": 0.38054470770168214, + "learning_rate": 9.598745352654819e-06, + "loss": 0.1718, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16042864322662354, + "step": 3350, + "valid_targets_mean": 5992.3, + "valid_targets_min": 598 + }, + { + "epoch": 4.955686853766617, + "grad_norm": 0.4603546661973167, + "learning_rate": 9.535901092075945e-06, + "loss": 0.1739, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17585790157318115, + "step": 3355, + "valid_targets_mean": 4433.1, + "valid_targets_min": 538 + }, + { + "epoch": 4.963072378138848, + "grad_norm": 0.4507007240993226, + "learning_rate": 9.473198770917686e-06, + "loss": 0.1651, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13589590787887573, + "step": 3360, + "valid_targets_mean": 5814.6, + "valid_targets_min": 649 + }, + { + "epoch": 4.970457902511078, + "grad_norm": 0.42542002909001325, + "learning_rate": 9.41063923970059e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16971269249916077, + "step": 3365, + "valid_targets_mean": 5632.9, + "valid_targets_min": 584 + }, + { + "epoch": 4.977843426883309, + "grad_norm": 0.6339824403474643, + "learning_rate": 9.348223347008342e-06, + "loss": 0.1862, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1671033501625061, + "step": 3370, + "valid_targets_mean": 6262.1, + "valid_targets_min": 831 + }, + { + "epoch": 4.985228951255539, + "grad_norm": 0.4263518906101623, + "learning_rate": 9.285951939476256e-06, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1599881947040558, + "step": 3375, + "valid_targets_mean": 4836.1, + "valid_targets_min": 521 + }, + { + "epoch": 4.99261447562777, + "grad_norm": 0.4552157775554924, + "learning_rate": 9.22382586177978e-06, + "loss": 0.1549, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17859028279781342, + "step": 3380, + "valid_targets_mean": 5123.1, + "valid_targets_min": 664 + }, + { + "epoch": 5.0, + "grad_norm": 0.42182388484726885, + "learning_rate": 9.161845956623072e-06, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.125313863158226, + "step": 3385, + "valid_targets_mean": 5043.0, + "valid_targets_min": 661 + }, + { + "epoch": 5.00738552437223, + "grad_norm": 0.4371353465408426, + "learning_rate": 9.100013064727508e-06, + "loss": 0.1752, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19350577890872955, + "step": 3390, + "valid_targets_mean": 6081.2, + "valid_targets_min": 591 + }, + { + "epoch": 5.014771048744461, + "grad_norm": 0.444555503330858, + "learning_rate": 9.038328024820342e-06, + "loss": 0.1691, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2033313512802124, + "step": 3395, + "valid_targets_mean": 5863.6, + "valid_targets_min": 499 + }, + { + "epoch": 5.022156573116692, + "grad_norm": 0.42165983760825365, + "learning_rate": 8.976791673623289e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15845409035682678, + "step": 3400, + "valid_targets_mean": 5640.8, + "valid_targets_min": 546 + }, + { + "epoch": 5.029542097488922, + "grad_norm": 0.38297498375540073, + "learning_rate": 8.915404845841194e-06, + "loss": 0.1623, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11503148823976517, + "step": 3405, + "valid_targets_mean": 7132.4, + "valid_targets_min": 592 + }, + { + "epoch": 5.036927621861152, + "grad_norm": 0.4953749429257958, + "learning_rate": 8.8541683741507e-06, + "loss": 0.1471, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14602802693843842, + "step": 3410, + "valid_targets_mean": 4715.0, + "valid_targets_min": 539 + }, + { + "epoch": 5.044313146233383, + "grad_norm": 0.5095609371925762, + "learning_rate": 8.793083089188953e-06, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1521594226360321, + "step": 3415, + "valid_targets_mean": 4684.5, + "valid_targets_min": 555 + }, + { + "epoch": 5.051698670605613, + "grad_norm": 0.4523371860610417, + "learning_rate": 8.732149819542362e-06, + "loss": 0.1603, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15225934982299805, + "step": 3420, + "valid_targets_mean": 5030.1, + "valid_targets_min": 577 + }, + { + "epoch": 5.059084194977843, + "grad_norm": 0.5280342375496218, + "learning_rate": 8.671369391735304e-06, + "loss": 0.1385, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14403800666332245, + "step": 3425, + "valid_targets_mean": 5231.7, + "valid_targets_min": 566 + }, + { + "epoch": 5.066469719350074, + "grad_norm": 0.40798873184330564, + "learning_rate": 8.610742630218978e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13136714696884155, + "step": 3430, + "valid_targets_mean": 6256.9, + "valid_targets_min": 613 + }, + { + "epoch": 5.073855243722305, + "grad_norm": 0.3861935517496378, + "learning_rate": 8.550270357360149e-06, + "loss": 0.1548, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12466324865818024, + "step": 3435, + "valid_targets_mean": 5849.4, + "valid_targets_min": 642 + }, + { + "epoch": 5.081240768094535, + "grad_norm": 0.7401400100882837, + "learning_rate": 8.489953393430066e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1705418825149536, + "step": 3440, + "valid_targets_mean": 3559.2, + "valid_targets_min": 490 + }, + { + "epoch": 5.088626292466765, + "grad_norm": 0.5193202825498705, + "learning_rate": 8.429792556593293e-06, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.22426575422286987, + "step": 3445, + "valid_targets_mean": 5109.2, + "valid_targets_min": 496 + }, + { + "epoch": 5.096011816838995, + "grad_norm": 0.43168022491540325, + "learning_rate": 8.36978866289661e-06, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17360247671604156, + "step": 3450, + "valid_targets_mean": 5924.7, + "valid_targets_min": 690 + }, + { + "epoch": 5.103397341211226, + "grad_norm": 0.5111277913511018, + "learning_rate": 8.309942526257982e-06, + "loss": 0.1451, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14671558141708374, + "step": 3455, + "valid_targets_mean": 4076.2, + "valid_targets_min": 552 + }, + { + "epoch": 5.110782865583457, + "grad_norm": 0.3745220484810527, + "learning_rate": 8.250254958455466e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13649728894233704, + "step": 3460, + "valid_targets_mean": 6392.8, + "valid_targets_min": 953 + }, + { + "epoch": 5.118168389955687, + "grad_norm": 0.41895740102958157, + "learning_rate": 8.19072676911623e-06, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16213387250900269, + "step": 3465, + "valid_targets_mean": 5501.8, + "valid_targets_min": 563 + }, + { + "epoch": 5.125553914327917, + "grad_norm": 0.4621969274320497, + "learning_rate": 8.13135876570557e-06, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1634249985218048, + "step": 3470, + "valid_targets_mean": 5318.6, + "valid_targets_min": 739 + }, + { + "epoch": 5.132939438700148, + "grad_norm": 0.5101384366926183, + "learning_rate": 8.072151753515963e-06, + "loss": 0.1665, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18241387605667114, + "step": 3475, + "valid_targets_mean": 4392.2, + "valid_targets_min": 589 + }, + { + "epoch": 5.140324963072378, + "grad_norm": 0.435494385133351, + "learning_rate": 8.013106535656098e-06, + "loss": 0.1415, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15624777972698212, + "step": 3480, + "valid_targets_mean": 5854.9, + "valid_targets_min": 671 + }, + { + "epoch": 5.147710487444608, + "grad_norm": 0.45540841610405663, + "learning_rate": 7.954223913040052e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16797244548797607, + "step": 3485, + "valid_targets_mean": 6307.3, + "valid_targets_min": 650 + }, + { + "epoch": 5.155096011816839, + "grad_norm": 0.435797126087404, + "learning_rate": 7.895504684376387e-06, + "loss": 0.1686, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20473451912403107, + "step": 3490, + "valid_targets_mean": 6468.6, + "valid_targets_min": 688 + }, + { + "epoch": 5.16248153618907, + "grad_norm": 0.4658949563928541, + "learning_rate": 7.836949646157317e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23565639555454254, + "step": 3495, + "valid_targets_mean": 5807.4, + "valid_targets_min": 471 + }, + { + "epoch": 5.1698670605613, + "grad_norm": 0.6532185606945251, + "learning_rate": 7.778559592647903e-06, + "loss": 0.1461, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15222051739692688, + "step": 3500, + "valid_targets_mean": 5615.7, + "valid_targets_min": 583 + }, + { + "epoch": 5.17725258493353, + "grad_norm": 0.37994849707269074, + "learning_rate": 7.720335315875286e-06, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11698979139328003, + "step": 3505, + "valid_targets_mean": 6916.1, + "valid_targets_min": 303 + }, + { + "epoch": 5.1846381093057605, + "grad_norm": 0.4867862414276969, + "learning_rate": 7.662277605617952e-06, + "loss": 0.1566, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15551450848579407, + "step": 3510, + "valid_targets_mean": 5208.1, + "valid_targets_min": 692 + }, + { + "epoch": 5.192023633677991, + "grad_norm": 0.5261180668274233, + "learning_rate": 7.604387249394991e-06, + "loss": 0.1883, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17861658334732056, + "step": 3515, + "valid_targets_mean": 4572.2, + "valid_targets_min": 643 + }, + { + "epoch": 5.199409158050222, + "grad_norm": 0.5013761956045906, + "learning_rate": 7.546665032455458e-06, + "loss": 0.1742, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24076738953590393, + "step": 3520, + "valid_targets_mean": 5969.2, + "valid_targets_min": 952 + }, + { + "epoch": 5.206794682422452, + "grad_norm": 0.44388940849901665, + "learning_rate": 7.4891117377676555e-06, + "loss": 0.1746, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1808503419160843, + "step": 3525, + "valid_targets_mean": 5442.9, + "valid_targets_min": 831 + }, + { + "epoch": 5.2141802067946825, + "grad_norm": 0.3368141280700644, + "learning_rate": 7.431728146008603e-06, + "loss": 0.1428, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12330228835344315, + "step": 3530, + "valid_targets_mean": 7554.9, + "valid_targets_min": 804 + }, + { + "epoch": 5.221565731166913, + "grad_norm": 0.47194572305530047, + "learning_rate": 7.3745150355533665e-06, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1410825550556183, + "step": 3535, + "valid_targets_mean": 4599.2, + "valid_targets_min": 521 + }, + { + "epoch": 5.228951255539143, + "grad_norm": 0.4589841119409957, + "learning_rate": 7.317473182464543e-06, + "loss": 0.1592, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14487279951572418, + "step": 3540, + "valid_targets_mean": 5436.2, + "valid_targets_min": 472 + }, + { + "epoch": 5.236336779911373, + "grad_norm": 0.5079352080568156, + "learning_rate": 7.260603360481719e-06, + "loss": 0.1661, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.177564337849617, + "step": 3545, + "valid_targets_mean": 4225.0, + "valid_targets_min": 441 + }, + { + "epoch": 5.2437223042836045, + "grad_norm": 0.4124981604846651, + "learning_rate": 7.20390634101098e-06, + "loss": 0.1628, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20939132571220398, + "step": 3550, + "valid_targets_mean": 7137.3, + "valid_targets_min": 425 + }, + { + "epoch": 5.251107828655835, + "grad_norm": 0.4748050711224649, + "learning_rate": 7.147382893114447e-06, + "loss": 0.1561, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15160918235778809, + "step": 3555, + "valid_targets_mean": 5582.1, + "valid_targets_min": 508 + }, + { + "epoch": 5.258493353028065, + "grad_norm": 0.49570642663047704, + "learning_rate": 7.091033783499843e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1662381887435913, + "step": 3560, + "valid_targets_mean": 4688.8, + "valid_targets_min": 412 + }, + { + "epoch": 5.265878877400295, + "grad_norm": 0.6444535354021959, + "learning_rate": 7.034859776510094e-06, + "loss": 0.164, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18472233414649963, + "step": 3565, + "valid_targets_mean": 2869.7, + "valid_targets_min": 358 + }, + { + "epoch": 5.273264401772526, + "grad_norm": 0.36003282906413414, + "learning_rate": 6.978861634112959e-06, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11764170229434967, + "step": 3570, + "valid_targets_mean": 7215.8, + "valid_targets_min": 714 + }, + { + "epoch": 5.280649926144756, + "grad_norm": 0.4363348321964383, + "learning_rate": 6.923040115890698e-06, + "loss": 0.1646, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14421112835407257, + "step": 3575, + "valid_targets_mean": 5132.3, + "valid_targets_min": 731 + }, + { + "epoch": 5.288035450516987, + "grad_norm": 0.4270407605139215, + "learning_rate": 6.867395979029763e-06, + "loss": 0.1531, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15610837936401367, + "step": 3580, + "valid_targets_mean": 5726.0, + "valid_targets_min": 524 + }, + { + "epoch": 5.295420974889217, + "grad_norm": 0.4372448860288484, + "learning_rate": 6.8119299783105384e-06, + "loss": 0.1516, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1485387682914734, + "step": 3585, + "valid_targets_mean": 5501.2, + "valid_targets_min": 798 + }, + { + "epoch": 5.302806499261448, + "grad_norm": 0.49670165229019625, + "learning_rate": 6.756642866097087e-06, + "loss": 0.1727, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21197667717933655, + "step": 3590, + "valid_targets_mean": 4981.1, + "valid_targets_min": 593 + }, + { + "epoch": 5.310192023633678, + "grad_norm": 0.4873247420128096, + "learning_rate": 6.701535392326959e-06, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14547309279441833, + "step": 3595, + "valid_targets_mean": 4563.1, + "valid_targets_min": 619 + }, + { + "epoch": 5.317577548005908, + "grad_norm": 0.4104843544339754, + "learning_rate": 6.646608304501008e-06, + "loss": 0.149, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16900783777236938, + "step": 3600, + "valid_targets_mean": 6581.2, + "valid_targets_min": 764 + }, + { + "epoch": 5.3249630723781385, + "grad_norm": 0.47195907463014314, + "learning_rate": 6.591862347673257e-06, + "loss": 0.1699, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1981767863035202, + "step": 3605, + "valid_targets_mean": 4831.9, + "valid_targets_min": 710 + }, + { + "epoch": 5.33234859675037, + "grad_norm": 0.542224951252824, + "learning_rate": 6.537298264440813e-06, + "loss": 0.1658, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19629326462745667, + "step": 3610, + "valid_targets_mean": 3798.4, + "valid_targets_min": 432 + }, + { + "epoch": 5.3397341211226, + "grad_norm": 0.35804884095064315, + "learning_rate": 6.482916794933736e-06, + "loss": 0.141, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12061704695224762, + "step": 3615, + "valid_targets_mean": 6460.8, + "valid_targets_min": 638 + }, + { + "epoch": 5.34711964549483, + "grad_norm": 0.49330966651853386, + "learning_rate": 6.428718676805065e-06, + "loss": 0.1817, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21051400899887085, + "step": 3620, + "valid_targets_mean": 4860.2, + "valid_targets_min": 433 + }, + { + "epoch": 5.3545051698670605, + "grad_norm": 0.5321457303177618, + "learning_rate": 6.374704645220775e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1549859642982483, + "step": 3625, + "valid_targets_mean": 3886.5, + "valid_targets_min": 466 + }, + { + "epoch": 5.361890694239291, + "grad_norm": 0.39489790904559613, + "learning_rate": 6.320875432849816e-06, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1310429573059082, + "step": 3630, + "valid_targets_mean": 7088.1, + "valid_targets_min": 584 + }, + { + "epoch": 5.369276218611521, + "grad_norm": 0.45105096964297986, + "learning_rate": 6.2672317698541695e-06, + "loss": 0.1533, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540766954421997, + "step": 3635, + "valid_targets_mean": 4639.0, + "valid_targets_min": 560 + }, + { + "epoch": 5.376661742983752, + "grad_norm": 0.4753836438289837, + "learning_rate": 6.213774383878945e-06, + "loss": 0.1542, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12997017800807953, + "step": 3640, + "valid_targets_mean": 5125.3, + "valid_targets_min": 526 + }, + { + "epoch": 5.3840472673559825, + "grad_norm": 0.5042444574219167, + "learning_rate": 6.1605040000425285e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13099181652069092, + "step": 3645, + "valid_targets_mean": 5819.9, + "valid_targets_min": 490 + }, + { + "epoch": 5.391432791728213, + "grad_norm": 0.3863263579045339, + "learning_rate": 6.107421340926716e-06, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16232043504714966, + "step": 3650, + "valid_targets_mean": 6967.1, + "valid_targets_min": 901 + }, + { + "epoch": 5.398818316100443, + "grad_norm": 0.5220422627235417, + "learning_rate": 6.054527126566933e-06, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16426241397857666, + "step": 3655, + "valid_targets_mean": 4679.2, + "valid_targets_min": 716 + }, + { + "epoch": 5.406203840472673, + "grad_norm": 0.4319185540338788, + "learning_rate": 6.0018220744424495e-06, + "loss": 0.1524, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1663556545972824, + "step": 3660, + "valid_targets_mean": 6097.4, + "valid_targets_min": 602 + }, + { + "epoch": 5.413589364844904, + "grad_norm": 0.3956255035616646, + "learning_rate": 5.949306899466672e-06, + "loss": 0.1511, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14825277030467987, + "step": 3665, + "valid_targets_mean": 6583.9, + "valid_targets_min": 691 + }, + { + "epoch": 5.420974889217135, + "grad_norm": 0.484584083668749, + "learning_rate": 5.8969823139774304e-06, + "loss": 0.1629, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19931569695472717, + "step": 3670, + "valid_targets_mean": 4773.4, + "valid_targets_min": 597 + }, + { + "epoch": 5.428360413589365, + "grad_norm": 0.45434367771112616, + "learning_rate": 5.844849027727313e-06, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19150590896606445, + "step": 3675, + "valid_targets_mean": 5992.9, + "valid_targets_min": 505 + }, + { + "epoch": 5.435745937961595, + "grad_norm": 0.6026865956309536, + "learning_rate": 5.792907747874068e-06, + "loss": 0.156, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16173362731933594, + "step": 3680, + "valid_targets_mean": 4554.5, + "valid_targets_min": 573 + }, + { + "epoch": 5.443131462333826, + "grad_norm": 0.40334097093396654, + "learning_rate": 5.741159178970958e-06, + "loss": 0.1551, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15286067128181458, + "step": 3685, + "valid_targets_mean": 6434.6, + "valid_targets_min": 894 + }, + { + "epoch": 5.450516986706056, + "grad_norm": 0.42746203298644087, + "learning_rate": 5.689604022957256e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1851692497730255, + "step": 3690, + "valid_targets_mean": 6208.4, + "valid_targets_min": 584 + }, + { + "epoch": 5.457902511078286, + "grad_norm": 0.48488271400781807, + "learning_rate": 5.638242979148685e-06, + "loss": 0.1782, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17530173063278198, + "step": 3695, + "valid_targets_mean": 4681.8, + "valid_targets_min": 369 + }, + { + "epoch": 5.465288035450517, + "grad_norm": 0.49213245126776284, + "learning_rate": 5.587076744227966e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1436440646648407, + "step": 3700, + "valid_targets_mean": 4852.8, + "valid_targets_min": 349 + }, + { + "epoch": 5.472673559822748, + "grad_norm": 0.5385593413242621, + "learning_rate": 5.536106012235329e-06, + "loss": 0.1796, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23084211349487305, + "step": 3705, + "valid_targets_mean": 5293.5, + "valid_targets_min": 733 + }, + { + "epoch": 5.480059084194978, + "grad_norm": 0.4064012565579344, + "learning_rate": 5.485331474559133e-06, + "loss": 0.1558, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13796594738960266, + "step": 3710, + "valid_targets_mean": 6042.2, + "valid_targets_min": 491 + }, + { + "epoch": 5.487444608567208, + "grad_norm": 0.4466176384591676, + "learning_rate": 5.434753819926484e-06, + "loss": 0.1624, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16316545009613037, + "step": 3715, + "valid_targets_mean": 4737.5, + "valid_targets_min": 662 + }, + { + "epoch": 5.4948301329394384, + "grad_norm": 0.48204920306109306, + "learning_rate": 5.384373734393871e-06, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12412101030349731, + "step": 3720, + "valid_targets_mean": 4247.8, + "valid_targets_min": 430 + }, + { + "epoch": 5.50221565731167, + "grad_norm": 0.4423331415810954, + "learning_rate": 5.334191901337875e-06, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16865645349025726, + "step": 3725, + "valid_targets_mean": 5476.6, + "valid_targets_min": 635 + }, + { + "epoch": 5.5096011816839, + "grad_norm": 0.4261034310873824, + "learning_rate": 5.284209001445901e-06, + "loss": 0.1641, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1629231870174408, + "step": 3730, + "valid_targets_mean": 5663.4, + "valid_targets_min": 613 + }, + { + "epoch": 5.51698670605613, + "grad_norm": 0.5270843670535649, + "learning_rate": 5.234425712706938e-06, + "loss": 0.1706, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1648382544517517, + "step": 3735, + "valid_targets_mean": 4095.7, + "valid_targets_min": 587 + }, + { + "epoch": 5.5243722304283605, + "grad_norm": 0.4594394151163437, + "learning_rate": 5.184842710402367e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14536496996879578, + "step": 3740, + "valid_targets_mean": 5437.5, + "valid_targets_min": 734 + }, + { + "epoch": 5.531757754800591, + "grad_norm": 0.54902978481994, + "learning_rate": 5.135460667096806e-06, + "loss": 0.1639, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20517590641975403, + "step": 3745, + "valid_targets_mean": 4786.9, + "valid_targets_min": 500 + }, + { + "epoch": 5.539143279172821, + "grad_norm": 0.45086635799096986, + "learning_rate": 5.086280252628955e-06, + "loss": 0.1589, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15122875571250916, + "step": 3750, + "valid_targets_mean": 4694.8, + "valid_targets_min": 309 + }, + { + "epoch": 5.546528803545051, + "grad_norm": 0.4417456541588059, + "learning_rate": 5.037302134102575e-06, + "loss": 0.1656, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16173198819160461, + "step": 3755, + "valid_targets_mean": 5160.4, + "valid_targets_min": 677 + }, + { + "epoch": 5.5539143279172825, + "grad_norm": 0.4768321706536616, + "learning_rate": 4.988526975877374e-06, + "loss": 0.1491, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14764034748077393, + "step": 3760, + "valid_targets_mean": 4331.2, + "valid_targets_min": 612 + }, + { + "epoch": 5.561299852289513, + "grad_norm": 0.41225383767497426, + "learning_rate": 4.939955439560031e-06, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12844733893871307, + "step": 3765, + "valid_targets_mean": 6010.1, + "valid_targets_min": 515 + }, + { + "epoch": 5.568685376661743, + "grad_norm": 0.46955403794769657, + "learning_rate": 4.891588183995206e-06, + "loss": 0.1674, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18649445474147797, + "step": 3770, + "valid_targets_mean": 4942.9, + "valid_targets_min": 639 + }, + { + "epoch": 5.576070901033973, + "grad_norm": 0.3936059528169128, + "learning_rate": 4.8434258652566165e-06, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1388961225748062, + "step": 3775, + "valid_targets_mean": 5572.7, + "valid_targets_min": 635 + }, + { + "epoch": 5.583456425406204, + "grad_norm": 0.4122457123845285, + "learning_rate": 4.795469136638125e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14227622747421265, + "step": 3780, + "valid_targets_mean": 6029.8, + "valid_targets_min": 535 + }, + { + "epoch": 5.590841949778435, + "grad_norm": 0.4412352401816102, + "learning_rate": 4.747718648644884e-06, + "loss": 0.1757, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17034125328063965, + "step": 3785, + "valid_targets_mean": 5354.1, + "valid_targets_min": 652 + }, + { + "epoch": 5.598227474150665, + "grad_norm": 0.5432056759362457, + "learning_rate": 4.700175048984517e-06, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11455181241035461, + "step": 3790, + "valid_targets_mean": 4452.2, + "valid_targets_min": 513 + }, + { + "epoch": 5.605612998522895, + "grad_norm": 0.48873939155313356, + "learning_rate": 4.652838982558321e-06, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17513684928417206, + "step": 3795, + "valid_targets_mean": 4769.3, + "valid_targets_min": 688 + }, + { + "epoch": 5.612998522895126, + "grad_norm": 0.4245229968901909, + "learning_rate": 4.605711091452529e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1345527321100235, + "step": 3800, + "valid_targets_mean": 4822.4, + "valid_targets_min": 320 + }, + { + "epoch": 5.620384047267356, + "grad_norm": 0.4483079582956929, + "learning_rate": 4.5587920149296e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19811424612998962, + "step": 3805, + "valid_targets_mean": 5786.8, + "valid_targets_min": 641 + }, + { + "epoch": 5.627769571639586, + "grad_norm": 0.3685763809564081, + "learning_rate": 4.512082389419536e-06, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11352822184562683, + "step": 3810, + "valid_targets_mean": 6317.6, + "valid_targets_min": 610 + }, + { + "epoch": 5.635155096011816, + "grad_norm": 0.45195450036915635, + "learning_rate": 4.465582848511265e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15802201628684998, + "step": 3815, + "valid_targets_mean": 4964.2, + "valid_targets_min": 637 + }, + { + "epoch": 5.642540620384048, + "grad_norm": 0.4019080742593427, + "learning_rate": 4.419294022944036e-06, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1530536711215973, + "step": 3820, + "valid_targets_mean": 6357.9, + "valid_targets_min": 669 + }, + { + "epoch": 5.649926144756278, + "grad_norm": 0.6283549194708017, + "learning_rate": 4.373216540598868e-06, + "loss": 0.1396, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17285491526126862, + "step": 3825, + "valid_targets_mean": 3964.8, + "valid_targets_min": 375 + }, + { + "epoch": 5.657311669128508, + "grad_norm": 0.43312904879288805, + "learning_rate": 4.32735102649003e-06, + "loss": 0.1403, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14635983109474182, + "step": 3830, + "valid_targets_mean": 6200.1, + "valid_targets_min": 656 + }, + { + "epoch": 5.664697193500738, + "grad_norm": 0.43595340706555863, + "learning_rate": 4.281698102756568e-06, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1400405317544937, + "step": 3835, + "valid_targets_mean": 5565.6, + "valid_targets_min": 360 + }, + { + "epoch": 5.672082717872969, + "grad_norm": 0.48135722402982084, + "learning_rate": 4.236258388653862e-06, + "loss": 0.1756, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20159319043159485, + "step": 3840, + "valid_targets_mean": 5819.4, + "valid_targets_min": 835 + }, + { + "epoch": 5.6794682422452, + "grad_norm": 0.3965676517017414, + "learning_rate": 4.1910325005452266e-06, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1373254656791687, + "step": 3845, + "valid_targets_mean": 6392.2, + "valid_targets_min": 668 + }, + { + "epoch": 5.68685376661743, + "grad_norm": 0.42777121231754817, + "learning_rate": 4.146021051893554e-06, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1447691023349762, + "step": 3850, + "valid_targets_mean": 5918.9, + "valid_targets_min": 606 + }, + { + "epoch": 5.69423929098966, + "grad_norm": 0.5369213979278165, + "learning_rate": 4.101224653252984e-06, + "loss": 0.1605, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14309504628181458, + "step": 3855, + "valid_targets_mean": 4915.6, + "valid_targets_min": 710 + }, + { + "epoch": 5.701624815361891, + "grad_norm": 0.46685754924383494, + "learning_rate": 4.056643912260634e-06, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14844262599945068, + "step": 3860, + "valid_targets_mean": 4834.6, + "valid_targets_min": 535 + }, + { + "epoch": 5.709010339734121, + "grad_norm": 0.5069631731010494, + "learning_rate": 4.012279433628339e-06, + "loss": 0.1887, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17662671208381653, + "step": 3865, + "valid_targets_mean": 4272.2, + "valid_targets_min": 427 + }, + { + "epoch": 5.716395864106351, + "grad_norm": 0.39028852212572046, + "learning_rate": 3.968131819134484e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14806535840034485, + "step": 3870, + "valid_targets_mean": 7409.1, + "valid_targets_min": 1271 + }, + { + "epoch": 5.7237813884785815, + "grad_norm": 0.42390110263128356, + "learning_rate": 3.924201667615804e-06, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1607496738433838, + "step": 3875, + "valid_targets_mean": 6367.4, + "valid_targets_min": 773 + }, + { + "epoch": 5.731166912850813, + "grad_norm": 0.48659182537833057, + "learning_rate": 3.880489574959261e-06, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17565742135047913, + "step": 3880, + "valid_targets_mean": 4721.1, + "valid_targets_min": 679 + }, + { + "epoch": 5.738552437223043, + "grad_norm": 0.4685263238659188, + "learning_rate": 3.836996134093997e-06, + "loss": 0.1535, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20819538831710815, + "step": 3885, + "valid_targets_mean": 5287.9, + "valid_targets_min": 524 + }, + { + "epoch": 5.745937961595273, + "grad_norm": 0.5369846316753283, + "learning_rate": 3.7937219349832633e-06, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16460327804088593, + "step": 3890, + "valid_targets_mean": 3801.6, + "valid_targets_min": 635 + }, + { + "epoch": 5.7533234859675035, + "grad_norm": 0.41623622826233153, + "learning_rate": 3.750667564616417e-06, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19304777681827545, + "step": 3895, + "valid_targets_mean": 6364.7, + "valid_targets_min": 471 + }, + { + "epoch": 5.760709010339734, + "grad_norm": 0.43621830231445924, + "learning_rate": 3.7078336070009723e-06, + "loss": 0.1662, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14152905344963074, + "step": 3900, + "valid_targets_mean": 6270.2, + "valid_targets_min": 298 + }, + { + "epoch": 5.768094534711965, + "grad_norm": 1.253010108913432, + "learning_rate": 3.665220643154681e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16495048999786377, + "step": 3905, + "valid_targets_mean": 4590.6, + "valid_targets_min": 606 + }, + { + "epoch": 5.775480059084195, + "grad_norm": 0.46508774999134267, + "learning_rate": 3.622829251097626e-06, + "loss": 0.1555, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14090654253959656, + "step": 3910, + "valid_targets_mean": 4980.5, + "valid_targets_min": 495 + }, + { + "epoch": 5.7828655834564255, + "grad_norm": 0.44297068805386, + "learning_rate": 3.5806600058444075e-06, + "loss": 0.1387, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15334025025367737, + "step": 3915, + "valid_targets_mean": 5535.4, + "valid_targets_min": 573 + }, + { + "epoch": 5.790251107828656, + "grad_norm": 0.4887477567047723, + "learning_rate": 3.538713479396334e-06, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15382501482963562, + "step": 3920, + "valid_targets_mean": 4043.9, + "valid_targets_min": 553 + }, + { + "epoch": 5.797636632200886, + "grad_norm": 0.5918387677426872, + "learning_rate": 3.4969902407336532e-06, + "loss": 0.1857, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1690470576286316, + "step": 3925, + "valid_targets_mean": 3391.9, + "valid_targets_min": 606 + }, + { + "epoch": 5.805022156573116, + "grad_norm": 0.45735044033920097, + "learning_rate": 3.455490855807855e-06, + "loss": 0.1633, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19501203298568726, + "step": 3930, + "valid_targets_mean": 5170.4, + "valid_targets_min": 686 + }, + { + "epoch": 5.812407680945347, + "grad_norm": 0.42972354738525703, + "learning_rate": 3.4142158875339717e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16080421209335327, + "step": 3935, + "valid_targets_mean": 6471.1, + "valid_targets_min": 858 + }, + { + "epoch": 5.819793205317578, + "grad_norm": 0.4913472734202364, + "learning_rate": 3.373165895782973e-06, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15726235508918762, + "step": 3940, + "valid_targets_mean": 4487.4, + "valid_targets_min": 536 + }, + { + "epoch": 5.827178729689808, + "grad_norm": 0.3864919394835266, + "learning_rate": 3.332341437374138e-06, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1257772147655487, + "step": 3945, + "valid_targets_mean": 6180.5, + "valid_targets_min": 606 + }, + { + "epoch": 5.834564254062038, + "grad_norm": 0.4576260726948439, + "learning_rate": 3.2917430660675165e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13794982433319092, + "step": 3950, + "valid_targets_mean": 5125.7, + "valid_targets_min": 562 + }, + { + "epoch": 5.841949778434269, + "grad_norm": 0.6723128146394246, + "learning_rate": 3.251371332556428e-06, + "loss": 0.1506, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14654061198234558, + "step": 3955, + "valid_targets_mean": 5167.3, + "valid_targets_min": 379 + }, + { + "epoch": 5.849335302806499, + "grad_norm": 0.5108031470733075, + "learning_rate": 3.2112267844599686e-06, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15340697765350342, + "step": 3960, + "valid_targets_mean": 4387.8, + "valid_targets_min": 654 + }, + { + "epoch": 5.85672082717873, + "grad_norm": 0.557035301382823, + "learning_rate": 3.171309966315614e-06, + "loss": 0.151, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16433045268058777, + "step": 3965, + "valid_targets_mean": 4127.7, + "valid_targets_min": 490 + }, + { + "epoch": 5.86410635155096, + "grad_norm": 0.4165972177795746, + "learning_rate": 3.131621419571795e-06, + "loss": 0.163, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15628871321678162, + "step": 3970, + "valid_targets_mean": 5658.2, + "valid_targets_min": 563 + }, + { + "epoch": 5.871491875923191, + "grad_norm": 0.4450131689820263, + "learning_rate": 3.0921616825805834e-06, + "loss": 0.1498, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15231899917125702, + "step": 3975, + "valid_targets_mean": 5490.5, + "valid_targets_min": 655 + }, + { + "epoch": 5.878877400295421, + "grad_norm": 0.46785664172207136, + "learning_rate": 3.0529312905903886e-06, + "loss": 0.1595, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15238377451896667, + "step": 3980, + "valid_targets_mean": 4522.8, + "valid_targets_min": 504 + }, + { + "epoch": 5.886262924667651, + "grad_norm": 0.49684861734721625, + "learning_rate": 3.013930775738676e-06, + "loss": 0.148, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1534469872713089, + "step": 3985, + "valid_targets_mean": 4430.8, + "valid_targets_min": 557 + }, + { + "epoch": 5.8936484490398815, + "grad_norm": 0.6814697551595089, + "learning_rate": 2.975160667044763e-06, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16581161320209503, + "step": 3990, + "valid_targets_mean": 4855.6, + "valid_targets_min": 570 + }, + { + "epoch": 5.901033973412112, + "grad_norm": 0.42548375911231223, + "learning_rate": 2.9366214904026445e-06, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1445370763540268, + "step": 3995, + "valid_targets_mean": 5361.6, + "valid_targets_min": 679 + }, + { + "epoch": 5.908419497784343, + "grad_norm": 0.4413005273888539, + "learning_rate": 2.898313768573857e-06, + "loss": 0.1613, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14729730784893036, + "step": 4000, + "valid_targets_mean": 5133.8, + "valid_targets_min": 491 + }, + { + "epoch": 5.915805022156573, + "grad_norm": 0.4985779399517165, + "learning_rate": 2.8602380211803815e-06, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15672720968723297, + "step": 4005, + "valid_targets_mean": 4657.6, + "valid_targets_min": 789 + }, + { + "epoch": 5.9231905465288035, + "grad_norm": 0.5264213368938573, + "learning_rate": 2.822394764697607e-06, + "loss": 0.1653, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14332908391952515, + "step": 4010, + "valid_targets_mean": 4761.6, + "valid_targets_min": 289 + }, + { + "epoch": 5.930576070901034, + "grad_norm": 0.4744288530268933, + "learning_rate": 2.784784512447314e-06, + "loss": 0.1728, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1922370046377182, + "step": 4015, + "valid_targets_mean": 5348.4, + "valid_targets_min": 853 + }, + { + "epoch": 5.937961595273264, + "grad_norm": 0.4438269690328926, + "learning_rate": 2.747407774590718e-06, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1440523862838745, + "step": 4020, + "valid_targets_mean": 4801.4, + "valid_targets_min": 700 + }, + { + "epoch": 5.945347119645495, + "grad_norm": 0.3990012691696163, + "learning_rate": 2.7102650581215486e-06, + "loss": 0.1591, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13657835125923157, + "step": 4025, + "valid_targets_mean": 6708.6, + "valid_targets_min": 648 + }, + { + "epoch": 5.9527326440177255, + "grad_norm": 0.41646211715971676, + "learning_rate": 2.673356866859167e-06, + "loss": 0.1655, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15765783190727234, + "step": 4030, + "valid_targets_mean": 5822.4, + "valid_targets_min": 872 + }, + { + "epoch": 5.960118168389956, + "grad_norm": 0.4931126540910065, + "learning_rate": 2.6366837014417422e-06, + "loss": 0.1599, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1812196969985962, + "step": 4035, + "valid_targets_mean": 5131.9, + "valid_targets_min": 714 + }, + { + "epoch": 5.967503692762186, + "grad_norm": 0.4482245693010152, + "learning_rate": 2.6002460593194466e-06, + "loss": 0.1715, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14398686587810516, + "step": 4040, + "valid_targets_mean": 5426.2, + "valid_targets_min": 384 + }, + { + "epoch": 5.974889217134416, + "grad_norm": 0.4086296843327063, + "learning_rate": 2.5640444347477255e-06, + "loss": 0.145, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1505671888589859, + "step": 4045, + "valid_targets_mean": 6444.2, + "valid_targets_min": 477 + }, + { + "epoch": 5.982274741506647, + "grad_norm": 0.5290771022870968, + "learning_rate": 2.528079318780574e-06, + "loss": 0.1773, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17048917710781097, + "step": 4050, + "valid_targets_mean": 3984.4, + "valid_targets_min": 495 + }, + { + "epoch": 5.989660265878878, + "grad_norm": 0.4256479196291018, + "learning_rate": 2.4923511992638895e-06, + "loss": 0.1601, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14762143790721893, + "step": 4055, + "valid_targets_mean": 5162.6, + "valid_targets_min": 703 + }, + { + "epoch": 5.997045790251108, + "grad_norm": 0.46850279871296086, + "learning_rate": 2.4568605608288533e-06, + "loss": 0.1622, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14865565299987793, + "step": 4060, + "valid_targets_mean": 4318.1, + "valid_targets_min": 486 + }, + { + "epoch": 6.004431314623338, + "grad_norm": 0.5249959190204746, + "learning_rate": 2.4216078848853506e-06, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17384469509124756, + "step": 4065, + "valid_targets_mean": 3995.9, + "valid_targets_min": 477 + }, + { + "epoch": 6.011816838995569, + "grad_norm": 0.5988657789473296, + "learning_rate": 2.386593649615441e-06, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.151736781001091, + "step": 4070, + "valid_targets_mean": 2800.8, + "valid_targets_min": 451 + }, + { + "epoch": 6.019202363367799, + "grad_norm": 0.39840181178979595, + "learning_rate": 2.3518183299668796e-06, + "loss": 0.138, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13792383670806885, + "step": 4075, + "valid_targets_mean": 5876.8, + "valid_targets_min": 645 + }, + { + "epoch": 6.026587887740029, + "grad_norm": 0.44045943750338273, + "learning_rate": 2.3172823976466697e-06, + "loss": 0.1406, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15484324097633362, + "step": 4080, + "valid_targets_mean": 5097.8, + "valid_targets_min": 771 + }, + { + "epoch": 6.03397341211226, + "grad_norm": 0.4629364970805179, + "learning_rate": 2.282986321114662e-06, + "loss": 0.1416, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14461535215377808, + "step": 4085, + "valid_targets_mean": 5088.2, + "valid_targets_min": 655 + }, + { + "epoch": 6.041358936484491, + "grad_norm": 0.40286656062179965, + "learning_rate": 2.2489305655771987e-06, + "loss": 0.1534, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16793042421340942, + "step": 4090, + "valid_targets_mean": 6758.6, + "valid_targets_min": 832 + }, + { + "epoch": 6.048744460856721, + "grad_norm": 0.4182839452913791, + "learning_rate": 2.215115592980821e-06, + "loss": 0.1484, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14046970009803772, + "step": 4095, + "valid_targets_mean": 5969.9, + "valid_targets_min": 640 + }, + { + "epoch": 6.056129985228951, + "grad_norm": 0.43593965359653836, + "learning_rate": 2.1815418620059824e-06, + "loss": 0.1388, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332874596118927, + "step": 4100, + "valid_targets_mean": 5555.6, + "valid_targets_min": 476 + }, + { + "epoch": 6.0635155096011815, + "grad_norm": 0.46039610869991315, + "learning_rate": 2.1482098280608234e-06, + "loss": 0.1619, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.2060922533273697, + "step": 4105, + "valid_targets_mean": 5705.4, + "valid_targets_min": 614 + }, + { + "epoch": 6.070901033973412, + "grad_norm": 0.37373910107719843, + "learning_rate": 2.1151199432750192e-06, + "loss": 0.1477, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1127336174249649, + "step": 4110, + "valid_targets_mean": 6756.9, + "valid_targets_min": 831 + }, + { + "epoch": 6.078286558345643, + "grad_norm": 0.44535195342707495, + "learning_rate": 2.082272656493625e-06, + "loss": 0.146, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15034779906272888, + "step": 4115, + "valid_targets_mean": 5689.1, + "valid_targets_min": 349 + }, + { + "epoch": 6.085672082717873, + "grad_norm": 0.5195916162622733, + "learning_rate": 2.049668413271e-06, + "loss": 0.1573, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19452224671840668, + "step": 4120, + "valid_targets_mean": 4656.3, + "valid_targets_min": 636 + }, + { + "epoch": 6.0930576070901035, + "grad_norm": 0.5560703022649638, + "learning_rate": 2.0173076558647487e-06, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14960438013076782, + "step": 4125, + "valid_targets_mean": 4023.4, + "valid_targets_min": 692 + }, + { + "epoch": 6.100443131462334, + "grad_norm": 0.4749923373792734, + "learning_rate": 1.9851908232297525e-06, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13118794560432434, + "step": 4130, + "valid_targets_mean": 4465.4, + "valid_targets_min": 326 + }, + { + "epoch": 6.107828655834564, + "grad_norm": 0.4829535285337888, + "learning_rate": 1.9533183510121725e-06, + "loss": 0.1508, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15433762967586517, + "step": 4135, + "valid_targets_mean": 5477.0, + "valid_targets_min": 621 + }, + { + "epoch": 6.115214180206794, + "grad_norm": 0.45301666809919827, + "learning_rate": 1.9216906715435744e-06, + "loss": 0.1414, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1339162439107895, + "step": 4140, + "valid_targets_mean": 4647.9, + "valid_targets_min": 761 + }, + { + "epoch": 6.1225997045790255, + "grad_norm": 0.39338422495107067, + "learning_rate": 1.8903082138350548e-06, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1282079815864563, + "step": 4145, + "valid_targets_mean": 5771.3, + "valid_targets_min": 425 + }, + { + "epoch": 6.129985228951256, + "grad_norm": 0.44371532394815066, + "learning_rate": 1.8591714035714049e-06, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16418243944644928, + "step": 4150, + "valid_targets_mean": 5660.0, + "valid_targets_min": 479 + }, + { + "epoch": 6.137370753323486, + "grad_norm": 0.48563406133620735, + "learning_rate": 1.8282806631053662e-06, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16766785085201263, + "step": 4155, + "valid_targets_mean": 4655.8, + "valid_targets_min": 433 + }, + { + "epoch": 6.144756277695716, + "grad_norm": 0.6072416236863333, + "learning_rate": 1.7976364114518796e-06, + "loss": 0.1463, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1494389772415161, + "step": 4160, + "valid_targets_mean": 3736.5, + "valid_targets_min": 491 + }, + { + "epoch": 6.152141802067947, + "grad_norm": 0.50481142735507, + "learning_rate": 1.7672390642824156e-06, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17777088284492493, + "step": 4165, + "valid_targets_mean": 4276.0, + "valid_targets_min": 666 + }, + { + "epoch": 6.159527326440177, + "grad_norm": 0.42071660129307986, + "learning_rate": 1.7370890339193191e-06, + "loss": 0.1647, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1517738252878189, + "step": 4170, + "valid_targets_mean": 6217.3, + "valid_targets_min": 747 + }, + { + "epoch": 6.166912850812408, + "grad_norm": 0.4290485201318066, + "learning_rate": 1.7071867293302325e-06, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1593089997768402, + "step": 4175, + "valid_targets_mean": 5545.1, + "valid_targets_min": 765 + }, + { + "epoch": 6.174298375184638, + "grad_norm": 0.43744895390706484, + "learning_rate": 1.6775325561225409e-06, + "loss": 0.15, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13858440518379211, + "step": 4180, + "valid_targets_mean": 5377.1, + "valid_targets_min": 466 + }, + { + "epoch": 6.181683899556869, + "grad_norm": 0.4262626698684947, + "learning_rate": 1.648126916537871e-06, + "loss": 0.1494, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14648282527923584, + "step": 4185, + "valid_targets_mean": 5709.4, + "valid_targets_min": 654 + }, + { + "epoch": 6.189069423929099, + "grad_norm": 0.47051468258097345, + "learning_rate": 1.6189702094466309e-06, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1473892331123352, + "step": 4190, + "valid_targets_mean": 4704.9, + "valid_targets_min": 755 + }, + { + "epoch": 6.196454948301329, + "grad_norm": 0.49070628134476585, + "learning_rate": 1.5900628303426114e-06, + "loss": 0.1571, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15285709500312805, + "step": 4195, + "valid_targets_mean": 4618.8, + "valid_targets_min": 530 + }, + { + "epoch": 6.203840472673559, + "grad_norm": 0.4253518994986448, + "learning_rate": 1.5614051713376066e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15116259455680847, + "step": 4200, + "valid_targets_mean": 5736.6, + "valid_targets_min": 412 + }, + { + "epoch": 6.211225997045791, + "grad_norm": 0.4641185127939372, + "learning_rate": 1.5329976211561138e-06, + "loss": 0.1685, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.20113864541053772, + "step": 4205, + "valid_targets_mean": 5099.3, + "valid_targets_min": 529 + }, + { + "epoch": 6.218611521418021, + "grad_norm": 0.39647752417172666, + "learning_rate": 1.504840565130039e-06, + "loss": 0.1464, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13573205471038818, + "step": 4210, + "valid_targets_mean": 6350.6, + "valid_targets_min": 711 + }, + { + "epoch": 6.225997045790251, + "grad_norm": 0.48103069316244723, + "learning_rate": 1.4769343851934847e-06, + "loss": 0.1638, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1464741975069046, + "step": 4215, + "valid_targets_mean": 4600.5, + "valid_targets_min": 514 + }, + { + "epoch": 6.233382570162481, + "grad_norm": 0.40232624403233436, + "learning_rate": 1.4492794598775661e-06, + "loss": 0.1431, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13256195187568665, + "step": 4220, + "valid_targets_mean": 6487.2, + "valid_targets_min": 679 + }, + { + "epoch": 6.240768094534712, + "grad_norm": 0.5055273590949532, + "learning_rate": 1.4218761643052692e-06, + "loss": 0.1538, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15635859966278076, + "step": 4225, + "valid_targets_mean": 4193.4, + "valid_targets_min": 721 + }, + { + "epoch": 6.248153618906942, + "grad_norm": 0.43866996029377286, + "learning_rate": 1.3947248701863813e-06, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16044774651527405, + "step": 4230, + "valid_targets_mean": 5434.1, + "valid_targets_min": 541 + }, + { + "epoch": 6.255539143279173, + "grad_norm": 0.49231511408163975, + "learning_rate": 1.3678259458124177e-06, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13283999264240265, + "step": 4235, + "valid_targets_mean": 4179.0, + "valid_targets_min": 800 + }, + { + "epoch": 6.262924667651403, + "grad_norm": 0.4230623819145653, + "learning_rate": 1.3411797560516627e-06, + "loss": 0.1344, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1318785846233368, + "step": 4240, + "valid_targets_mean": 6249.6, + "valid_targets_min": 703 + }, + { + "epoch": 6.270310192023634, + "grad_norm": 0.46935895443155884, + "learning_rate": 1.314786662344194e-06, + "loss": 0.1497, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17924916744232178, + "step": 4245, + "valid_targets_mean": 5661.8, + "valid_targets_min": 570 + }, + { + "epoch": 6.277695716395864, + "grad_norm": 0.3888317431003321, + "learning_rate": 1.2886470226969895e-06, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13889117538928986, + "step": 4250, + "valid_targets_mean": 6869.9, + "valid_targets_min": 990 + }, + { + "epoch": 6.285081240768094, + "grad_norm": 0.4352449485035959, + "learning_rate": 1.2627611916790715e-06, + "loss": 0.1473, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14910173416137695, + "step": 4255, + "valid_targets_mean": 5603.9, + "valid_targets_min": 491 + }, + { + "epoch": 6.2924667651403245, + "grad_norm": 0.40749937475334613, + "learning_rate": 1.237129520416691e-06, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15692628920078278, + "step": 4260, + "valid_targets_mean": 6081.8, + "valid_targets_min": 831 + }, + { + "epoch": 6.299852289512556, + "grad_norm": 0.4037894828650866, + "learning_rate": 1.211752356588578e-06, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13675357401371002, + "step": 4265, + "valid_targets_mean": 6421.4, + "valid_targets_min": 457 + }, + { + "epoch": 6.307237813884786, + "grad_norm": 0.5205689683666549, + "learning_rate": 1.1866300444212108e-06, + "loss": 0.1517, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15477876365184784, + "step": 4270, + "valid_targets_mean": 3994.2, + "valid_targets_min": 546 + }, + { + "epoch": 6.314623338257016, + "grad_norm": 0.5661967143739712, + "learning_rate": 1.161762924684151e-06, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15488114953041077, + "step": 4275, + "valid_targets_mean": 4311.4, + "valid_targets_min": 541 + }, + { + "epoch": 6.3220088626292466, + "grad_norm": 0.4837147960209401, + "learning_rate": 1.1371513346854313e-06, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16949772834777832, + "step": 4280, + "valid_targets_mean": 4836.6, + "valid_targets_min": 493 + }, + { + "epoch": 6.329394387001477, + "grad_norm": 0.5432762673776387, + "learning_rate": 1.1127956082669633e-06, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18372945487499237, + "step": 4285, + "valid_targets_mean": 4226.1, + "valid_targets_min": 707 + }, + { + "epoch": 6.336779911373707, + "grad_norm": 0.5144469478911506, + "learning_rate": 1.0886960758000265e-06, + "loss": 0.1635, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16694745421409607, + "step": 4290, + "valid_targets_mean": 3961.9, + "valid_targets_min": 400 + }, + { + "epoch": 6.344165435745938, + "grad_norm": 0.43923637649600045, + "learning_rate": 1.0648530641807686e-06, + "loss": 0.1501, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1794458031654358, + "step": 4295, + "valid_targets_mean": 5821.2, + "valid_targets_min": 526 + }, + { + "epoch": 6.3515509601181686, + "grad_norm": 0.5123527090839185, + "learning_rate": 1.0412668968257877e-06, + "loss": 0.1445, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14006029069423676, + "step": 4300, + "valid_targets_mean": 4426.3, + "valid_targets_min": 781 + }, + { + "epoch": 6.358936484490399, + "grad_norm": 0.4115851655709371, + "learning_rate": 1.0179378936677397e-06, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1542370468378067, + "step": 4305, + "valid_targets_mean": 5933.6, + "valid_targets_min": 666 + }, + { + "epoch": 6.366322008862629, + "grad_norm": 0.5258862879779717, + "learning_rate": 9.948663711509954e-07, + "loss": 0.1575, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15517973899841309, + "step": 4310, + "valid_targets_mean": 4407.2, + "valid_targets_min": 586 + }, + { + "epoch": 6.373707533234859, + "grad_norm": 0.44159616792495127, + "learning_rate": 9.720526422273412e-07, + "loss": 0.1512, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1318591982126236, + "step": 4315, + "valid_targets_mean": 5543.4, + "valid_targets_min": 841 + }, + { + "epoch": 6.381093057607091, + "grad_norm": 0.49990677130913785, + "learning_rate": 9.494970163517681e-07, + "loss": 0.1565, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15071862936019897, + "step": 4320, + "valid_targets_mean": 4528.5, + "valid_targets_min": 573 + }, + { + "epoch": 6.388478581979321, + "grad_norm": 0.4164585567607767, + "learning_rate": 9.2719979947822e-07, + "loss": 0.1611, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15653051435947418, + "step": 4325, + "valid_targets_mean": 6323.8, + "valid_targets_min": 490 + }, + { + "epoch": 6.395864106351551, + "grad_norm": 0.5160481333349497, + "learning_rate": 9.051612940554899e-07, + "loss": 0.1568, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1293860673904419, + "step": 4330, + "valid_targets_mean": 4956.7, + "valid_targets_min": 796 + }, + { + "epoch": 6.403249630723781, + "grad_norm": 0.5499032197777276, + "learning_rate": 8.833817990230953e-07, + "loss": 0.168, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15469960868358612, + "step": 4335, + "valid_targets_mean": 4249.5, + "valid_targets_min": 324 + }, + { + "epoch": 6.410635155096012, + "grad_norm": 0.40152843519987197, + "learning_rate": 8.61861609807233e-07, + "loss": 0.1432, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11483586579561234, + "step": 4340, + "valid_targets_mean": 6111.3, + "valid_targets_min": 712 + }, + { + "epoch": 6.418020679468242, + "grad_norm": 0.42446425379167474, + "learning_rate": 8.406010183167601e-07, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1487945318222046, + "step": 4345, + "valid_targets_mean": 7079.7, + "valid_targets_min": 685 + }, + { + "epoch": 6.425406203840472, + "grad_norm": 0.7413738678926328, + "learning_rate": 8.196003129392438e-07, + "loss": 0.1732, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16445940732955933, + "step": 4350, + "valid_targets_mean": 3112.4, + "valid_targets_min": 548 + }, + { + "epoch": 6.432791728212703, + "grad_norm": 0.44560225337100395, + "learning_rate": 7.988597785370533e-07, + "loss": 0.1442, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12470986694097519, + "step": 4355, + "valid_targets_mean": 5328.5, + "valid_targets_min": 257 + }, + { + "epoch": 6.440177252584934, + "grad_norm": 0.4745632975601192, + "learning_rate": 7.783796964434831e-07, + "loss": 0.1434, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16859686374664307, + "step": 4360, + "valid_targets_mean": 5014.4, + "valid_targets_min": 726 + }, + { + "epoch": 6.447562776957164, + "grad_norm": 0.4642679768852893, + "learning_rate": 7.581603444589447e-07, + "loss": 0.1519, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13736671209335327, + "step": 4365, + "valid_targets_mean": 4825.1, + "valid_targets_min": 528 + }, + { + "epoch": 6.454948301329394, + "grad_norm": 0.4434842107811936, + "learning_rate": 7.382019968471943e-07, + "loss": 0.1753, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14103060960769653, + "step": 4370, + "valid_targets_mean": 5577.3, + "valid_targets_min": 552 + }, + { + "epoch": 6.4623338257016245, + "grad_norm": 0.503057563707869, + "learning_rate": 7.185049243316222e-07, + "loss": 0.152, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14642839133739471, + "step": 4375, + "valid_targets_mean": 4761.9, + "valid_targets_min": 652 + }, + { + "epoch": 6.469719350073856, + "grad_norm": 0.6276381062918326, + "learning_rate": 6.990693940915694e-07, + "loss": 0.1716, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.24949908256530762, + "step": 4380, + "valid_targets_mean": 3993.2, + "valid_targets_min": 535 + }, + { + "epoch": 6.477104874446086, + "grad_norm": 0.4926903015244238, + "learning_rate": 6.798956697587079e-07, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1511232703924179, + "step": 4385, + "valid_targets_mean": 4615.3, + "valid_targets_min": 595 + }, + { + "epoch": 6.484490398818316, + "grad_norm": 0.49283910568013917, + "learning_rate": 6.60984011413468e-07, + "loss": 0.1505, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16701708734035492, + "step": 4390, + "valid_targets_mean": 4338.4, + "valid_targets_min": 645 + }, + { + "epoch": 6.4918759231905465, + "grad_norm": 0.5002990883414868, + "learning_rate": 6.423346755815019e-07, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13407056033611298, + "step": 4395, + "valid_targets_mean": 4023.8, + "valid_targets_min": 658 + }, + { + "epoch": 6.499261447562777, + "grad_norm": 0.5338641891206741, + "learning_rate": 6.239479152302164e-07, + "loss": 0.1465, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17522169649600983, + "step": 4400, + "valid_targets_mean": 4392.1, + "valid_targets_min": 670 + }, + { + "epoch": 6.506646971935007, + "grad_norm": 0.5484669473808206, + "learning_rate": 6.058239797653276e-07, + "loss": 0.1678, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1687406748533249, + "step": 4405, + "valid_targets_mean": 4462.4, + "valid_targets_min": 482 + }, + { + "epoch": 6.514032496307237, + "grad_norm": 0.5639609310377635, + "learning_rate": 5.879631150274901e-07, + "loss": 0.1489, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15761615335941315, + "step": 4410, + "valid_targets_mean": 4938.3, + "valid_targets_min": 271 + }, + { + "epoch": 6.5214180206794685, + "grad_norm": 0.4302419740392286, + "learning_rate": 5.703655632889505e-07, + "loss": 0.1456, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12143665552139282, + "step": 4415, + "valid_targets_mean": 6462.4, + "valid_targets_min": 537 + }, + { + "epoch": 6.528803545051699, + "grad_norm": 0.4018145246408569, + "learning_rate": 5.530315632502725e-07, + "loss": 0.1537, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15424439311027527, + "step": 4420, + "valid_targets_mean": 7003.6, + "valid_targets_min": 650 + }, + { + "epoch": 6.536189069423929, + "grad_norm": 0.4280985946691859, + "learning_rate": 5.359613500370931e-07, + "loss": 0.1626, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13937228918075562, + "step": 4425, + "valid_targets_mean": 5996.9, + "valid_targets_min": 928 + }, + { + "epoch": 6.543574593796159, + "grad_norm": 0.512095968806376, + "learning_rate": 5.191551551969376e-07, + "loss": 0.1462, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1761317402124405, + "step": 4430, + "valid_targets_mean": 5061.4, + "valid_targets_min": 639 + }, + { + "epoch": 6.55096011816839, + "grad_norm": 0.42658213407812473, + "learning_rate": 5.026132066960699e-07, + "loss": 0.1372, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12839290499687195, + "step": 4435, + "valid_targets_mean": 6122.6, + "valid_targets_min": 989 + }, + { + "epoch": 6.558345642540621, + "grad_norm": 0.43584162901665235, + "learning_rate": 4.863357289164094e-07, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13353732228279114, + "step": 4440, + "valid_targets_mean": 5793.8, + "valid_targets_min": 774 + }, + { + "epoch": 6.565731166912851, + "grad_norm": 0.5121854270322571, + "learning_rate": 4.703229426524836e-07, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.140175461769104, + "step": 4445, + "valid_targets_mean": 6360.8, + "valid_targets_min": 825 + }, + { + "epoch": 6.573116691285081, + "grad_norm": 0.4891587376138227, + "learning_rate": 4.545750651084291e-07, + "loss": 0.1466, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1598307192325592, + "step": 4450, + "valid_targets_mean": 5076.1, + "valid_targets_min": 641 + }, + { + "epoch": 6.580502215657312, + "grad_norm": 0.48474954710037227, + "learning_rate": 4.3909230989505724e-07, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15411385893821716, + "step": 4455, + "valid_targets_mean": 4396.3, + "valid_targets_min": 641 + }, + { + "epoch": 6.587887740029542, + "grad_norm": 0.48095262700952346, + "learning_rate": 4.2387488702693823e-07, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15224063396453857, + "step": 4460, + "valid_targets_mean": 4503.4, + "valid_targets_min": 612 + }, + { + "epoch": 6.595273264401772, + "grad_norm": 0.42397246226943786, + "learning_rate": 4.0892300291957674e-07, + "loss": 0.1786, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14690491557121277, + "step": 4465, + "valid_targets_mean": 5915.6, + "valid_targets_min": 495 + }, + { + "epoch": 6.6026587887740025, + "grad_norm": 0.43317383963947165, + "learning_rate": 3.942368603865898e-07, + "loss": 0.1522, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13303223252296448, + "step": 4470, + "valid_targets_mean": 5465.1, + "valid_targets_min": 1317 + }, + { + "epoch": 6.610044313146234, + "grad_norm": 0.4006775023374142, + "learning_rate": 3.798166586369645e-07, + "loss": 0.1374, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15547743439674377, + "step": 4475, + "valid_targets_mean": 7271.6, + "valid_targets_min": 636 + }, + { + "epoch": 6.617429837518464, + "grad_norm": 0.4117824406720461, + "learning_rate": 3.656625932723579e-07, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13841594755649567, + "step": 4480, + "valid_targets_mean": 6281.6, + "valid_targets_min": 685 + }, + { + "epoch": 6.624815361890694, + "grad_norm": 0.5336042505216406, + "learning_rate": 3.5177485628444363e-07, + "loss": 0.1579, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1793767213821411, + "step": 4485, + "valid_targets_mean": 4352.6, + "valid_targets_min": 603 + }, + { + "epoch": 6.6322008862629245, + "grad_norm": 0.44302761320669076, + "learning_rate": 3.381536360523008e-07, + "loss": 0.1375, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14193832874298096, + "step": 4490, + "valid_targets_mean": 5695.8, + "valid_targets_min": 734 + }, + { + "epoch": 6.639586410635155, + "grad_norm": 0.4383730918433756, + "learning_rate": 3.247991173398668e-07, + "loss": 0.1321, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1333121955394745, + "step": 4495, + "valid_targets_mean": 5675.1, + "valid_targets_min": 622 + }, + { + "epoch": 6.646971935007386, + "grad_norm": 0.5170425717885939, + "learning_rate": 3.117114812934263e-07, + "loss": 0.1411, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1605900526046753, + "step": 4500, + "valid_targets_mean": 4572.1, + "valid_targets_min": 575 + }, + { + "epoch": 6.654357459379616, + "grad_norm": 0.48806033544384714, + "learning_rate": 2.988909054391598e-07, + "loss": 0.1545, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1332065463066101, + "step": 4505, + "valid_targets_mean": 4321.8, + "valid_targets_min": 573 + }, + { + "epoch": 6.6617429837518465, + "grad_norm": 0.37245789606924895, + "learning_rate": 2.8633756368072307e-07, + "loss": 0.1632, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11881101131439209, + "step": 4510, + "valid_targets_mean": 6310.8, + "valid_targets_min": 803 + }, + { + "epoch": 6.669128508124077, + "grad_norm": 0.4549884322992929, + "learning_rate": 2.740516262969051e-07, + "loss": 0.1481, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1540367156267166, + "step": 4515, + "valid_targets_mean": 5073.8, + "valid_targets_min": 637 + }, + { + "epoch": 6.676514032496307, + "grad_norm": 0.4944141719547407, + "learning_rate": 2.6203325993930717e-07, + "loss": 0.1594, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11980420351028442, + "step": 4520, + "valid_targets_mean": 4708.9, + "valid_targets_min": 499 + }, + { + "epoch": 6.683899556868537, + "grad_norm": 0.5049668775794947, + "learning_rate": 2.5028262763008514e-07, + "loss": 0.1478, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17368857562541962, + "step": 4525, + "valid_targets_mean": 4323.2, + "valid_targets_min": 529 + }, + { + "epoch": 6.691285081240768, + "grad_norm": 0.39940995378060945, + "learning_rate": 2.3879988875973314e-07, + "loss": 0.1536, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12155850231647491, + "step": 4530, + "valid_targets_mean": 6206.7, + "valid_targets_min": 552 + }, + { + "epoch": 6.698670605612999, + "grad_norm": 0.4013521013669753, + "learning_rate": 2.2758519908493648e-07, + "loss": 0.1525, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15022462606430054, + "step": 4535, + "valid_targets_mean": 6336.5, + "valid_targets_min": 630 + }, + { + "epoch": 6.706056129985229, + "grad_norm": 0.5620177209255185, + "learning_rate": 2.1663871072643784e-07, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16755668818950653, + "step": 4540, + "valid_targets_mean": 3727.6, + "valid_targets_min": 390 + }, + { + "epoch": 6.713441654357459, + "grad_norm": 0.5258801995748262, + "learning_rate": 2.059605721669966e-07, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14239409565925598, + "step": 4545, + "valid_targets_mean": 4034.1, + "valid_targets_min": 540 + }, + { + "epoch": 6.72082717872969, + "grad_norm": 0.4560448915232561, + "learning_rate": 1.9555092824935062e-07, + "loss": 0.1496, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1599361002445221, + "step": 4550, + "valid_targets_mean": 5137.9, + "valid_targets_min": 310 + }, + { + "epoch": 6.72821270310192, + "grad_norm": 0.46957544677234575, + "learning_rate": 1.854099201742754e-07, + "loss": 0.1469, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13492749631404877, + "step": 4555, + "valid_targets_mean": 5321.8, + "valid_targets_min": 486 + }, + { + "epoch": 6.735598227474151, + "grad_norm": 0.4893832374792917, + "learning_rate": 1.7553768549864791e-07, + "loss": 0.1526, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1458035260438919, + "step": 4560, + "valid_targets_mean": 4632.6, + "valid_targets_min": 723 + }, + { + "epoch": 6.742983751846381, + "grad_norm": 0.49924643293505594, + "learning_rate": 1.6593435813359704e-07, + "loss": 0.1421, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1429969072341919, + "step": 4565, + "valid_targets_mean": 5114.1, + "valid_targets_min": 573 + }, + { + "epoch": 6.750369276218612, + "grad_norm": 0.5588546086896948, + "learning_rate": 1.5660006834267826e-07, + "loss": 0.1577, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.17076215147972107, + "step": 4570, + "valid_targets_mean": 3644.6, + "valid_targets_min": 462 + }, + { + "epoch": 6.757754800590842, + "grad_norm": 0.44934370220938186, + "learning_rate": 1.4753494274010404e-07, + "loss": 0.1557, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12686952948570251, + "step": 4575, + "valid_targets_mean": 4405.2, + "valid_targets_min": 532 + }, + { + "epoch": 6.765140324963072, + "grad_norm": 0.40287819658097046, + "learning_rate": 1.387391042890407e-07, + "loss": 0.1563, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1626736968755722, + "step": 4580, + "valid_targets_mean": 6230.9, + "valid_targets_min": 471 + }, + { + "epoch": 6.772525849335302, + "grad_norm": 0.39016310022420664, + "learning_rate": 1.3021267229992307e-07, + "loss": 0.1541, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13419151306152344, + "step": 4585, + "valid_targets_mean": 5865.1, + "valid_targets_min": 552 + }, + { + "epoch": 6.779911373707534, + "grad_norm": 0.48929349503037056, + "learning_rate": 1.2195576242884256e-07, + "loss": 0.1492, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16033774614334106, + "step": 4590, + "valid_targets_mean": 4465.0, + "valid_targets_min": 462 + }, + { + "epoch": 6.787296898079764, + "grad_norm": 0.4099040200611785, + "learning_rate": 1.1396848667598826e-07, + "loss": 0.1514, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13809999823570251, + "step": 4595, + "valid_targets_mean": 5768.4, + "valid_targets_min": 562 + }, + { + "epoch": 6.794682422451994, + "grad_norm": 0.47774449623791543, + "learning_rate": 1.0625095338411051e-07, + "loss": 0.1507, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1699640303850174, + "step": 4600, + "valid_targets_mean": 4953.6, + "valid_targets_min": 553 + }, + { + "epoch": 6.802067946824224, + "grad_norm": 0.5076200380883104, + "learning_rate": 9.880326723706646e-08, + "loss": 0.1479, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1531982421875, + "step": 4605, + "valid_targets_mean": 4243.5, + "valid_targets_min": 575 + }, + { + "epoch": 6.809453471196455, + "grad_norm": 0.5365924598906461, + "learning_rate": 9.162552925838564e-08, + "loss": 0.1694, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.19307509064674377, + "step": 4610, + "valid_targets_mean": 4674.6, + "valid_targets_min": 577 + }, + { + "epoch": 6.816838995568685, + "grad_norm": 0.4114855170974802, + "learning_rate": 8.471783680991774e-08, + "loss": 0.1649, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11866377294063568, + "step": 4615, + "valid_targets_mean": 5945.6, + "valid_targets_min": 500 + }, + { + "epoch": 6.824224519940916, + "grad_norm": 0.4311591899524311, + "learning_rate": 7.80802835904959e-08, + "loss": 0.137, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13947513699531555, + "step": 4620, + "valid_targets_mean": 5683.1, + "valid_targets_min": 634 + }, + { + "epoch": 6.8316100443131464, + "grad_norm": 0.5619454499028758, + "learning_rate": 7.171295963466884e-08, + "loss": 0.1597, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1453944444656372, + "step": 4625, + "valid_targets_mean": 4229.2, + "valid_targets_min": 517 + }, + { + "epoch": 6.838995568685377, + "grad_norm": 0.43973530083348866, + "learning_rate": 6.561595131149068e-08, + "loss": 0.1509, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15559229254722595, + "step": 4630, + "valid_targets_mean": 5828.9, + "valid_targets_min": 731 + }, + { + "epoch": 6.846381093057607, + "grad_norm": 0.46646297249339624, + "learning_rate": 5.97893413233308e-08, + "loss": 0.1654, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13109324872493744, + "step": 4635, + "valid_targets_mean": 4558.2, + "valid_targets_min": 721 + }, + { + "epoch": 6.853766617429837, + "grad_norm": 0.5158968571743305, + "learning_rate": 5.4233208704770336e-08, + "loss": 0.1574, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18643151223659515, + "step": 4640, + "valid_targets_mean": 4622.7, + "valid_targets_min": 529 + }, + { + "epoch": 6.861152141802068, + "grad_norm": 0.42120731834961433, + "learning_rate": 4.894762882151849e-08, + "loss": 0.1488, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12933212518692017, + "step": 4645, + "valid_targets_mean": 5539.8, + "valid_targets_min": 516 + }, + { + "epoch": 6.868537666174299, + "grad_norm": 0.4701920136133854, + "learning_rate": 4.393267336939566e-08, + "loss": 0.1547, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16911543905735016, + "step": 4650, + "valid_targets_mean": 5174.4, + "valid_targets_min": 494 + }, + { + "epoch": 6.875923190546529, + "grad_norm": 0.4483722090071213, + "learning_rate": 3.9188410373358635e-08, + "loss": 0.1803, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.23113879561424255, + "step": 4655, + "valid_targets_mean": 6327.1, + "valid_targets_min": 597 + }, + { + "epoch": 6.883308714918759, + "grad_norm": 0.4700078471031414, + "learning_rate": 3.471490418657686e-08, + "loss": 0.1427, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15961627662181854, + "step": 4660, + "valid_targets_mean": 5725.3, + "valid_targets_min": 648 + }, + { + "epoch": 6.89069423929099, + "grad_norm": 0.4655597595051389, + "learning_rate": 3.051221548956207e-08, + "loss": 0.1443, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12833762168884277, + "step": 4665, + "valid_targets_mean": 4711.9, + "valid_targets_min": 725 + }, + { + "epoch": 6.89807976366322, + "grad_norm": 0.6380093111909683, + "learning_rate": 2.6580401289344472e-08, + "loss": 0.1468, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1340949535369873, + "step": 4670, + "valid_targets_mean": 3719.6, + "valid_targets_min": 521 + }, + { + "epoch": 6.905465288035451, + "grad_norm": 0.42232440957412026, + "learning_rate": 2.2919514918700038e-08, + "loss": 0.1584, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13673587143421173, + "step": 4675, + "valid_targets_mean": 6005.1, + "valid_targets_min": 567 + }, + { + "epoch": 6.912850812407681, + "grad_norm": 0.4656904084131196, + "learning_rate": 1.95296060354222e-08, + "loss": 0.1552, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1485702097415924, + "step": 4680, + "valid_targets_mean": 5101.6, + "valid_targets_min": 579 + }, + { + "epoch": 6.920236336779912, + "grad_norm": 0.4808238353600059, + "learning_rate": 1.6410720621655718e-08, + "loss": 0.1363, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1313798427581787, + "step": 4685, + "valid_targets_mean": 6804.3, + "valid_targets_min": 819 + }, + { + "epoch": 6.927621861152142, + "grad_norm": 0.4233348673449903, + "learning_rate": 1.356290098327051e-08, + "loss": 0.1553, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15477396547794342, + "step": 4690, + "valid_targets_mean": 6435.8, + "valid_targets_min": 699 + }, + { + "epoch": 6.935007385524372, + "grad_norm": 0.43007196837079203, + "learning_rate": 1.0986185749282118e-08, + "loss": 0.1486, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.11044066399335861, + "step": 4695, + "valid_targets_mean": 5944.6, + "valid_targets_min": 229 + }, + { + "epoch": 6.942392909896602, + "grad_norm": 0.5232298527072103, + "learning_rate": 8.680609871338786e-09, + "loss": 0.1708, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.18600329756736755, + "step": 4700, + "valid_targets_mean": 4885.4, + "valid_targets_min": 799 + }, + { + "epoch": 6.949778434268833, + "grad_norm": 0.6339786906635847, + "learning_rate": 6.646204623232954e-09, + "loss": 0.181, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.21870045363903046, + "step": 4705, + "valid_targets_mean": 3441.2, + "valid_targets_min": 526 + }, + { + "epoch": 6.957163958641064, + "grad_norm": 0.3930755994701203, + "learning_rate": 4.882997600494932e-09, + "loss": 0.1544, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.12523368000984192, + "step": 4710, + "valid_targets_mean": 6379.2, + "valid_targets_min": 563 + }, + { + "epoch": 6.964549483013294, + "grad_norm": 0.46455499149966306, + "learning_rate": 3.3910127199998643e-09, + "loss": 0.1625, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.1395748257637024, + "step": 4715, + "valid_targets_mean": 5036.2, + "valid_targets_min": 476 + }, + { + "epoch": 6.971935007385524, + "grad_norm": 0.5569847284562671, + "learning_rate": 2.170270219654658e-09, + "loss": 0.1417, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.16902390122413635, + "step": 4720, + "valid_targets_mean": 4324.9, + "valid_targets_min": 542 + }, + { + "epoch": 6.979320531757755, + "grad_norm": 0.4994292568305981, + "learning_rate": 1.2207866581248617e-09, + "loss": 0.1724, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.15097226202487946, + "step": 4725, + "valid_targets_mean": 4825.1, + "valid_targets_min": 314 + }, + { + "epoch": 6.986706056129985, + "grad_norm": 0.4317365485031485, + "learning_rate": 5.425749146015235e-10, + "loss": 0.1838, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.14707861840724945, + "step": 4730, + "valid_targets_mean": 6297.3, + "valid_targets_min": 696 + }, + { + "epoch": 6.994091580502216, + "grad_norm": 0.43567574617565746, + "learning_rate": 1.3564418863465378e-10, + "loss": 0.1539, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13918901979923248, + "step": 4735, + "valid_targets_mean": 5156.8, + "valid_targets_min": 633 + }, + { + "epoch": 7.0, + "loss_nan_ranks": 0, + "loss_rank_avg": 0.13066400587558746, + "step": 4739, + "total_flos": 1781000233680896.0, + "train_loss": 0.19506101114987875, + "train_runtime": 33204.2929, + "train_samples_per_second": 2.281, + "train_steps_per_second": 0.143, + "valid_targets_mean": 5264.3, + "valid_targets_min": 320 + } + ], + "logging_steps": 5, + "max_steps": 4739, + "num_input_tokens_seen": 0, + "num_train_epochs": 7, + "save_steps": 1500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1781000233680896.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}