| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 785, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.032, |
| "grad_norm": 5.969919192266041, |
| "learning_rate": 2.0253164556962026e-06, |
| "loss": 0.8031, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10316399484872818, |
| "step": 5, |
| "valid_targets_mean": 5898.1, |
| "valid_targets_min": 4028 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 3.6429299410711122, |
| "learning_rate": 4.556962025316456e-06, |
| "loss": 0.7889, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09859731048345566, |
| "step": 10, |
| "valid_targets_mean": 6049.2, |
| "valid_targets_min": 4883 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 2.0680976313131443, |
| "learning_rate": 7.08860759493671e-06, |
| "loss": 0.7529, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09387196600437164, |
| "step": 15, |
| "valid_targets_mean": 5630.9, |
| "valid_targets_min": 1497 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.9234228784288777, |
| "learning_rate": 9.620253164556963e-06, |
| "loss": 0.703, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09998244792222977, |
| "step": 20, |
| "valid_targets_mean": 5124.2, |
| "valid_targets_min": 2773 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.7533730035653565, |
| "learning_rate": 1.2151898734177216e-05, |
| "loss": 0.6706, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08619378507137299, |
| "step": 25, |
| "valid_targets_mean": 5973.1, |
| "valid_targets_min": 4495 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.5229622968543792, |
| "learning_rate": 1.468354430379747e-05, |
| "loss": 0.6533, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10587994754314423, |
| "step": 30, |
| "valid_targets_mean": 8102.1, |
| "valid_targets_min": 2814 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.4173979219736347, |
| "learning_rate": 1.7215189873417723e-05, |
| "loss": 0.6234, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07410383224487305, |
| "step": 35, |
| "valid_targets_mean": 6899.6, |
| "valid_targets_min": 3760 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.3441076042940234, |
| "learning_rate": 1.974683544303798e-05, |
| "loss": 0.602, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06940320879220963, |
| "step": 40, |
| "valid_targets_mean": 5900.6, |
| "valid_targets_min": 2399 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.3368414311757288, |
| "learning_rate": 2.2278481012658228e-05, |
| "loss": 0.5868, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07438945025205612, |
| "step": 45, |
| "valid_targets_mean": 6208.6, |
| "valid_targets_min": 1995 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.32375816181192346, |
| "learning_rate": 2.481012658227848e-05, |
| "loss": 0.5717, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07481700927019119, |
| "step": 50, |
| "valid_targets_mean": 5466.9, |
| "valid_targets_min": 2774 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.27104936425643794, |
| "learning_rate": 2.7341772151898737e-05, |
| "loss": 0.5616, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06090511381626129, |
| "step": 55, |
| "valid_targets_mean": 5490.6, |
| "valid_targets_min": 3690 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.2518240381816461, |
| "learning_rate": 2.987341772151899e-05, |
| "loss": 0.5445, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07113075256347656, |
| "step": 60, |
| "valid_targets_mean": 5494.9, |
| "valid_targets_min": 1995 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.2597289703106665, |
| "learning_rate": 3.240506329113924e-05, |
| "loss": 0.5355, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06590637564659119, |
| "step": 65, |
| "valid_targets_mean": 6131.6, |
| "valid_targets_min": 2753 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.2414076585796264, |
| "learning_rate": 3.49367088607595e-05, |
| "loss": 0.5283, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06234627589583397, |
| "step": 70, |
| "valid_targets_mean": 4963.4, |
| "valid_targets_min": 1162 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.25686184708281795, |
| "learning_rate": 3.746835443037975e-05, |
| "loss": 0.5215, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07690601050853729, |
| "step": 75, |
| "valid_targets_mean": 6496.1, |
| "valid_targets_min": 3258 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.23389768426609334, |
| "learning_rate": 4e-05, |
| "loss": 0.5301, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07471586018800735, |
| "step": 80, |
| "valid_targets_mean": 7326.5, |
| "valid_targets_min": 4883 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.2659779662223677, |
| "learning_rate": 3.999504991751045e-05, |
| "loss": 0.5181, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06517303735017776, |
| "step": 85, |
| "valid_targets_mean": 5350.9, |
| "valid_targets_min": 4258 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.2854265829349621, |
| "learning_rate": 3.9980202120373464e-05, |
| "loss": 0.5168, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06170065328478813, |
| "step": 90, |
| "valid_targets_mean": 4773.6, |
| "valid_targets_min": 2325 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.25661668844965835, |
| "learning_rate": 3.995546395837111e-05, |
| "loss": 0.5068, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.060933709144592285, |
| "step": 95, |
| "valid_targets_mean": 6117.9, |
| "valid_targets_min": 3084 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.26620564108617034, |
| "learning_rate": 3.992084767709763e-05, |
| "loss": 0.5077, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.08841434866189957, |
| "step": 100, |
| "valid_targets_mean": 7130.5, |
| "valid_targets_min": 4746 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.2521450008844816, |
| "learning_rate": 3.987637041189781e-05, |
| "loss": 0.515, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06610182672739029, |
| "step": 105, |
| "valid_targets_mean": 5218.6, |
| "valid_targets_min": 1170 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.2769161147127277, |
| "learning_rate": 3.982205417938482e-05, |
| "loss": 0.5113, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.053933948278427124, |
| "step": 110, |
| "valid_targets_mean": 5300.9, |
| "valid_targets_min": 3774 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.26841799626758156, |
| "learning_rate": 3.975792586654179e-05, |
| "loss": 0.5067, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05585269629955292, |
| "step": 115, |
| "valid_targets_mean": 5271.8, |
| "valid_targets_min": 3120 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.24943794626876947, |
| "learning_rate": 3.968401721741259e-05, |
| "loss": 0.4918, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06649080663919449, |
| "step": 120, |
| "valid_targets_mean": 5984.4, |
| "valid_targets_min": 1074 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.261327494239664, |
| "learning_rate": 3.960036481738819e-05, |
| "loss": 0.5054, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06885598599910736, |
| "step": 125, |
| "valid_targets_mean": 5274.5, |
| "valid_targets_min": 1935 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.2753180128794166, |
| "learning_rate": 3.950701007509667e-05, |
| "loss": 0.4932, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06847365945577621, |
| "step": 130, |
| "valid_targets_mean": 6286.2, |
| "valid_targets_min": 4558 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.24126150536668375, |
| "learning_rate": 3.940399920190552e-05, |
| "loss": 0.4994, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06032635271549225, |
| "step": 135, |
| "valid_targets_mean": 5742.0, |
| "valid_targets_min": 1876 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.24255988010200946, |
| "learning_rate": 3.92913831890467e-05, |
| "loss": 0.4881, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0585082583129406, |
| "step": 140, |
| "valid_targets_mean": 5897.5, |
| "valid_targets_min": 1367 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.2652605413991407, |
| "learning_rate": 3.916921778237556e-05, |
| "loss": 0.4876, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06493394076824188, |
| "step": 145, |
| "valid_targets_mean": 6277.0, |
| "valid_targets_min": 3360 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.25620367603679406, |
| "learning_rate": 3.903756345477612e-05, |
| "loss": 0.4891, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06971275061368942, |
| "step": 150, |
| "valid_targets_mean": 5391.6, |
| "valid_targets_min": 1684 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.2614209495886294, |
| "learning_rate": 3.889648537622657e-05, |
| "loss": 0.491, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.068354532122612, |
| "step": 155, |
| "valid_targets_mean": 8172.1, |
| "valid_targets_min": 5470 |
| }, |
| { |
| "epoch": 1.0192, |
| "grad_norm": 0.2644523216163286, |
| "learning_rate": 3.874605338153952e-05, |
| "loss": 0.4849, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.058990854769945145, |
| "step": 160, |
| "valid_targets_mean": 6214.0, |
| "valid_targets_min": 1643 |
| }, |
| { |
| "epoch": 1.0512, |
| "grad_norm": 0.24490848549247626, |
| "learning_rate": 3.8586341935793265e-05, |
| "loss": 0.4834, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06218273937702179, |
| "step": 165, |
| "valid_targets_mean": 6210.1, |
| "valid_targets_min": 4494 |
| }, |
| { |
| "epoch": 1.0832, |
| "grad_norm": 0.2712661869435267, |
| "learning_rate": 3.841743009747089e-05, |
| "loss": 0.4831, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04654332250356674, |
| "step": 170, |
| "valid_targets_mean": 4305.1, |
| "valid_targets_min": 1965 |
| }, |
| { |
| "epoch": 1.1152, |
| "grad_norm": 0.2597528694002882, |
| "learning_rate": 3.8239401479325714e-05, |
| "loss": 0.4863, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06452959775924683, |
| "step": 175, |
| "valid_targets_mean": 6206.4, |
| "valid_targets_min": 3935 |
| }, |
| { |
| "epoch": 1.1472, |
| "grad_norm": 0.2565936230747566, |
| "learning_rate": 3.8052344206992276e-05, |
| "loss": 0.4759, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0673588439822197, |
| "step": 180, |
| "valid_targets_mean": 6048.5, |
| "valid_targets_min": 2629 |
| }, |
| { |
| "epoch": 1.1792, |
| "grad_norm": 0.2238441416256691, |
| "learning_rate": 3.7856350875363396e-05, |
| "loss": 0.4701, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0603281706571579, |
| "step": 185, |
| "valid_targets_mean": 6227.0, |
| "valid_targets_min": 2116 |
| }, |
| { |
| "epoch": 1.2112, |
| "grad_norm": 0.2524018596078198, |
| "learning_rate": 3.765151850275497e-05, |
| "loss": 0.4874, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06632298231124878, |
| "step": 190, |
| "valid_targets_mean": 6587.5, |
| "valid_targets_min": 3340 |
| }, |
| { |
| "epoch": 1.2432, |
| "grad_norm": 0.23982954263969072, |
| "learning_rate": 3.7437948482881104e-05, |
| "loss": 0.4711, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.062065280973911285, |
| "step": 195, |
| "valid_targets_mean": 6443.0, |
| "valid_targets_min": 3359 |
| }, |
| { |
| "epoch": 1.2752, |
| "grad_norm": 0.2562843751835332, |
| "learning_rate": 3.721574653466336e-05, |
| "loss": 0.4837, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07022428512573242, |
| "step": 200, |
| "valid_targets_mean": 7027.1, |
| "valid_targets_min": 2255 |
| }, |
| { |
| "epoch": 1.3072, |
| "grad_norm": 0.2490298294787762, |
| "learning_rate": 3.698502264989903e-05, |
| "loss": 0.4773, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07066579163074493, |
| "step": 205, |
| "valid_targets_mean": 7863.0, |
| "valid_targets_min": 4232 |
| }, |
| { |
| "epoch": 1.3392, |
| "grad_norm": 0.23265554686032325, |
| "learning_rate": 3.674589103881432e-05, |
| "loss": 0.4908, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.047557681798934937, |
| "step": 210, |
| "valid_targets_mean": 5056.2, |
| "valid_targets_min": 3086 |
| }, |
| { |
| "epoch": 1.3712, |
| "grad_norm": 0.24730090791441747, |
| "learning_rate": 3.64984700735293e-05, |
| "loss": 0.4732, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05279272794723511, |
| "step": 215, |
| "valid_targets_mean": 5471.4, |
| "valid_targets_min": 886 |
| }, |
| { |
| "epoch": 1.4032, |
| "grad_norm": 0.2630816313944668, |
| "learning_rate": 3.624288222946273e-05, |
| "loss": 0.4742, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06347040832042694, |
| "step": 220, |
| "valid_targets_mean": 5821.9, |
| "valid_targets_min": 4845 |
| }, |
| { |
| "epoch": 1.4352, |
| "grad_norm": 0.24832616758076276, |
| "learning_rate": 3.597925402470578e-05, |
| "loss": 0.4866, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06918965280056, |
| "step": 225, |
| "valid_targets_mean": 7173.8, |
| "valid_targets_min": 3581 |
| }, |
| { |
| "epoch": 1.4672, |
| "grad_norm": 0.23769626736333738, |
| "learning_rate": 3.570771595739445e-05, |
| "loss": 0.4596, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06793850660324097, |
| "step": 230, |
| "valid_targets_mean": 8257.1, |
| "valid_targets_min": 1591 |
| }, |
| { |
| "epoch": 1.4992, |
| "grad_norm": 0.2702237749996595, |
| "learning_rate": 3.5428402441111964e-05, |
| "loss": 0.4689, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07246175408363342, |
| "step": 235, |
| "valid_targets_mean": 7759.9, |
| "valid_targets_min": 4436 |
| }, |
| { |
| "epoch": 1.5312000000000001, |
| "grad_norm": 0.2319782734521749, |
| "learning_rate": 3.5141451738352936e-05, |
| "loss": 0.4708, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05363692343235016, |
| "step": 240, |
| "valid_targets_mean": 5670.9, |
| "valid_targets_min": 2413 |
| }, |
| { |
| "epoch": 1.5632000000000001, |
| "grad_norm": 0.27676306471458656, |
| "learning_rate": 3.4847005892082266e-05, |
| "loss": 0.4734, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06264029443264008, |
| "step": 245, |
| "valid_targets_mean": 5770.8, |
| "valid_targets_min": 1468 |
| }, |
| { |
| "epoch": 1.5952, |
| "grad_norm": 0.252783500403107, |
| "learning_rate": 3.454521065542273e-05, |
| "loss": 0.4824, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.055929217487573624, |
| "step": 250, |
| "valid_targets_mean": 6546.8, |
| "valid_targets_min": 1503 |
| }, |
| { |
| "epoch": 1.6272, |
| "grad_norm": 0.2451881861509776, |
| "learning_rate": 3.423621541950597e-05, |
| "loss": 0.4742, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05802520364522934, |
| "step": 255, |
| "valid_targets_mean": 6049.6, |
| "valid_targets_min": 4073 |
| }, |
| { |
| "epoch": 1.6592, |
| "grad_norm": 0.22773293471846903, |
| "learning_rate": 3.3920173139522664e-05, |
| "loss": 0.463, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06799766421318054, |
| "step": 260, |
| "valid_targets_mean": 7074.5, |
| "valid_targets_min": 3579 |
| }, |
| { |
| "epoch": 1.6912, |
| "grad_norm": 0.2437536633189009, |
| "learning_rate": 3.35972402590084e-05, |
| "loss": 0.4642, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06866434961557388, |
| "step": 265, |
| "valid_targets_mean": 6526.2, |
| "valid_targets_min": 4294 |
| }, |
| { |
| "epoch": 1.7231999999999998, |
| "grad_norm": 0.23336254280041369, |
| "learning_rate": 3.326757663240291e-05, |
| "loss": 0.4578, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.061764735728502274, |
| "step": 270, |
| "valid_targets_mean": 5638.4, |
| "valid_targets_min": 3851 |
| }, |
| { |
| "epoch": 1.7551999999999999, |
| "grad_norm": 0.25169026721119286, |
| "learning_rate": 3.293134544592073e-05, |
| "loss": 0.4668, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.057048872113227844, |
| "step": 275, |
| "valid_targets_mean": 4920.8, |
| "valid_targets_min": 3774 |
| }, |
| { |
| "epoch": 1.7872, |
| "grad_norm": 0.24072397303655377, |
| "learning_rate": 3.258871313677274e-05, |
| "loss": 0.4622, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04774663969874382, |
| "step": 280, |
| "valid_targets_mean": 4761.9, |
| "valid_targets_min": 4079 |
| }, |
| { |
| "epoch": 1.8192, |
| "grad_norm": 0.2319763295369083, |
| "learning_rate": 3.2239849310778316e-05, |
| "loss": 0.4517, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05293641611933708, |
| "step": 285, |
| "valid_targets_mean": 6614.0, |
| "valid_targets_min": 1755 |
| }, |
| { |
| "epoch": 1.8512, |
| "grad_norm": 0.23208218752050475, |
| "learning_rate": 3.188492665840909e-05, |
| "loss": 0.4598, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06026110053062439, |
| "step": 290, |
| "valid_targets_mean": 6582.2, |
| "valid_targets_min": 2078 |
| }, |
| { |
| "epoch": 1.8832, |
| "grad_norm": 0.2292661755359355, |
| "learning_rate": 3.1524120869305726e-05, |
| "loss": 0.473, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.049538739025592804, |
| "step": 295, |
| "valid_targets_mean": 5109.5, |
| "valid_targets_min": 1325 |
| }, |
| { |
| "epoch": 1.9152, |
| "grad_norm": 0.23490137630382535, |
| "learning_rate": 3.11576105453101e-05, |
| "loss": 0.4705, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05621837079524994, |
| "step": 300, |
| "valid_targets_mean": 6087.4, |
| "valid_targets_min": 3403 |
| }, |
| { |
| "epoch": 1.9472, |
| "grad_norm": 0.2276557858248002, |
| "learning_rate": 3.0785577112055916e-05, |
| "loss": 0.4681, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05133836343884468, |
| "step": 305, |
| "valid_targets_mean": 5987.4, |
| "valid_targets_min": 1819 |
| }, |
| { |
| "epoch": 1.9792, |
| "grad_norm": 0.24871037185960643, |
| "learning_rate": 3.040820472916153e-05, |
| "loss": 0.4634, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05129968374967575, |
| "step": 310, |
| "valid_targets_mean": 5456.2, |
| "valid_targets_min": 4181 |
| }, |
| { |
| "epoch": 2.0064, |
| "grad_norm": 0.23280849628357086, |
| "learning_rate": 3.002568019906939e-05, |
| "loss": 0.4548, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04623069614171982, |
| "step": 315, |
| "valid_targets_mean": 6180.5, |
| "valid_targets_min": 1302 |
| }, |
| { |
| "epoch": 2.0384, |
| "grad_norm": 0.282242675395652, |
| "learning_rate": 2.963819287457733e-05, |
| "loss": 0.459, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06686653196811676, |
| "step": 320, |
| "valid_targets_mean": 6104.0, |
| "valid_targets_min": 4085 |
| }, |
| { |
| "epoch": 2.0704, |
| "grad_norm": 0.23053378285489914, |
| "learning_rate": 2.924593456510733e-05, |
| "loss": 0.452, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05468098074197769, |
| "step": 325, |
| "valid_targets_mean": 5716.1, |
| "valid_targets_min": 1796 |
| }, |
| { |
| "epoch": 2.1024, |
| "grad_norm": 0.2507497887143648, |
| "learning_rate": 2.8849099441758306e-05, |
| "loss": 0.4531, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06644373387098312, |
| "step": 330, |
| "valid_targets_mean": 6270.0, |
| "valid_targets_min": 1490 |
| }, |
| { |
| "epoch": 2.1344, |
| "grad_norm": 0.23425796911518604, |
| "learning_rate": 2.844788394118979e-05, |
| "loss": 0.4621, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.039441946893930435, |
| "step": 335, |
| "valid_targets_mean": 3962.6, |
| "valid_targets_min": 922 |
| }, |
| { |
| "epoch": 2.1664, |
| "grad_norm": 0.23569878804329844, |
| "learning_rate": 2.8042486668384164e-05, |
| "loss": 0.4591, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05416123569011688, |
| "step": 340, |
| "valid_targets_mean": 6127.9, |
| "valid_targets_min": 1671 |
| }, |
| { |
| "epoch": 2.1984, |
| "grad_norm": 0.2468111886759843, |
| "learning_rate": 2.7633108298335582e-05, |
| "loss": 0.4595, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06589130312204361, |
| "step": 345, |
| "valid_targets_mean": 6657.6, |
| "valid_targets_min": 4349 |
| }, |
| { |
| "epoch": 2.2304, |
| "grad_norm": 0.24042079473254022, |
| "learning_rate": 2.721995147671416e-05, |
| "loss": 0.457, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05363306403160095, |
| "step": 350, |
| "valid_targets_mean": 5796.2, |
| "valid_targets_min": 2050 |
| }, |
| { |
| "epoch": 2.2624, |
| "grad_norm": 0.2471509734125724, |
| "learning_rate": 2.68032207195547e-05, |
| "loss": 0.454, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0587216354906559, |
| "step": 355, |
| "valid_targets_mean": 5496.1, |
| "valid_targets_min": 1432 |
| }, |
| { |
| "epoch": 2.2944, |
| "grad_norm": 0.28641541148458394, |
| "learning_rate": 2.6383122312019604e-05, |
| "loss": 0.465, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07071933150291443, |
| "step": 360, |
| "valid_targets_mean": 6219.4, |
| "valid_targets_min": 3415 |
| }, |
| { |
| "epoch": 2.3264, |
| "grad_norm": 0.26655826172718433, |
| "learning_rate": 2.595986420628597e-05, |
| "loss": 0.4546, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.057701416313648224, |
| "step": 365, |
| "valid_targets_mean": 4345.5, |
| "valid_targets_min": 1503 |
| }, |
| { |
| "epoch": 2.3584, |
| "grad_norm": 0.2716820479559013, |
| "learning_rate": 2.5533655918607573e-05, |
| "loss": 0.4688, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0600113645195961, |
| "step": 370, |
| "valid_targets_mean": 5102.8, |
| "valid_targets_min": 1842 |
| }, |
| { |
| "epoch": 2.3904, |
| "grad_norm": 0.23769900034171296, |
| "learning_rate": 2.510470842560259e-05, |
| "loss": 0.4518, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05029679089784622, |
| "step": 375, |
| "valid_targets_mean": 5904.0, |
| "valid_targets_min": 2724 |
| }, |
| { |
| "epoch": 2.4224, |
| "grad_norm": 0.2649419209298672, |
| "learning_rate": 2.467323405981841e-05, |
| "loss": 0.4522, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06330947577953339, |
| "step": 380, |
| "valid_targets_mean": 5251.1, |
| "valid_targets_min": 1399 |
| }, |
| { |
| "epoch": 2.4544, |
| "grad_norm": 0.23372669557845718, |
| "learning_rate": 2.423944640462533e-05, |
| "loss": 0.4556, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.051049359142780304, |
| "step": 385, |
| "valid_targets_mean": 4859.1, |
| "valid_targets_min": 2197 |
| }, |
| { |
| "epoch": 2.4864, |
| "grad_norm": 0.22906821757723925, |
| "learning_rate": 2.3803560188490968e-05, |
| "loss": 0.4536, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05310038477182388, |
| "step": 390, |
| "valid_targets_mean": 5227.2, |
| "valid_targets_min": 2927 |
| }, |
| { |
| "epoch": 2.5183999999999997, |
| "grad_norm": 0.23996252933721968, |
| "learning_rate": 2.336579117868789e-05, |
| "loss": 0.4501, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.051032502204179764, |
| "step": 395, |
| "valid_targets_mean": 5646.5, |
| "valid_targets_min": 4346 |
| }, |
| { |
| "epoch": 2.5504, |
| "grad_norm": 0.26778220999928204, |
| "learning_rate": 2.292635607448711e-05, |
| "loss": 0.4562, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04963091388344765, |
| "step": 400, |
| "valid_targets_mean": 4997.0, |
| "valid_targets_min": 1813 |
| }, |
| { |
| "epoch": 2.5824, |
| "grad_norm": 0.22182241254722662, |
| "learning_rate": 2.248547239989008e-05, |
| "loss": 0.453, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06061498820781708, |
| "step": 405, |
| "valid_targets_mean": 5507.2, |
| "valid_targets_min": 3156 |
| }, |
| { |
| "epoch": 2.6144, |
| "grad_norm": 0.26254620904272924, |
| "learning_rate": 2.204335839595255e-05, |
| "loss": 0.45, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.053135473281145096, |
| "step": 410, |
| "valid_targets_mean": 5220.8, |
| "valid_targets_min": 786 |
| }, |
| { |
| "epoch": 2.6464, |
| "grad_norm": 0.22593873698632028, |
| "learning_rate": 2.1600232912753452e-05, |
| "loss": 0.4441, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.046432919800281525, |
| "step": 415, |
| "valid_targets_mean": 5801.0, |
| "valid_targets_min": 2289 |
| }, |
| { |
| "epoch": 2.6784, |
| "grad_norm": 0.250597076261437, |
| "learning_rate": 2.1156315301062293e-05, |
| "loss": 0.4474, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.053618162870407104, |
| "step": 420, |
| "valid_targets_mean": 5283.8, |
| "valid_targets_min": 3776 |
| }, |
| { |
| "epoch": 2.7104, |
| "grad_norm": 0.2276812490591841, |
| "learning_rate": 2.0711825303758712e-05, |
| "loss": 0.4522, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.059437140822410583, |
| "step": 425, |
| "valid_targets_mean": 8148.6, |
| "valid_targets_min": 4051 |
| }, |
| { |
| "epoch": 2.7424, |
| "grad_norm": 0.2410802848631017, |
| "learning_rate": 2.0266982947057962e-05, |
| "loss": 0.4475, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04853265732526779, |
| "step": 430, |
| "valid_targets_mean": 4983.1, |
| "valid_targets_min": 1578 |
| }, |
| { |
| "epoch": 2.7744, |
| "grad_norm": 0.25188709384965696, |
| "learning_rate": 1.9822008431596083e-05, |
| "loss": 0.4443, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06445351988077164, |
| "step": 435, |
| "valid_targets_mean": 5899.4, |
| "valid_targets_min": 3393 |
| }, |
| { |
| "epoch": 2.8064, |
| "grad_norm": 0.22677351357547834, |
| "learning_rate": 1.937712202342881e-05, |
| "loss": 0.4507, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0591612383723259, |
| "step": 440, |
| "valid_targets_mean": 8142.0, |
| "valid_targets_min": 3951 |
| }, |
| { |
| "epoch": 2.8384, |
| "grad_norm": 0.2476358336589753, |
| "learning_rate": 1.8932543944998037e-05, |
| "loss": 0.4385, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.050485286861658096, |
| "step": 445, |
| "valid_targets_mean": 4756.0, |
| "valid_targets_min": 1252 |
| }, |
| { |
| "epoch": 2.8704, |
| "grad_norm": 0.2687286121438972, |
| "learning_rate": 1.8488494266119877e-05, |
| "loss": 0.4661, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.051208123564720154, |
| "step": 450, |
| "valid_targets_mean": 5437.2, |
| "valid_targets_min": 3784 |
| }, |
| { |
| "epoch": 2.9024, |
| "grad_norm": 0.26765025387673375, |
| "learning_rate": 1.804519279504834e-05, |
| "loss": 0.4611, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.07309385389089584, |
| "step": 455, |
| "valid_targets_mean": 7018.2, |
| "valid_targets_min": 4636 |
| }, |
| { |
| "epoch": 2.9344, |
| "grad_norm": 0.2551626742443098, |
| "learning_rate": 1.7602858969668365e-05, |
| "loss": 0.4491, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0500410795211792, |
| "step": 460, |
| "valid_targets_mean": 6023.8, |
| "valid_targets_min": 2822 |
| }, |
| { |
| "epoch": 2.9664, |
| "grad_norm": 0.23355241965056686, |
| "learning_rate": 1.716171174887231e-05, |
| "loss": 0.4504, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04919644072651863, |
| "step": 465, |
| "valid_targets_mean": 4722.8, |
| "valid_targets_min": 981 |
| }, |
| { |
| "epoch": 2.9984, |
| "grad_norm": 0.25567719668264177, |
| "learning_rate": 1.6721969504173484e-05, |
| "loss": 0.4615, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06145084276795387, |
| "step": 470, |
| "valid_targets_mean": 6633.8, |
| "valid_targets_min": 2664 |
| }, |
| { |
| "epoch": 3.0256, |
| "grad_norm": 0.23723825981265084, |
| "learning_rate": 1.628384991161041e-05, |
| "loss": 0.4481, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0490913987159729, |
| "step": 475, |
| "valid_targets_mean": 5354.5, |
| "valid_targets_min": 1861 |
| }, |
| { |
| "epoch": 3.0576, |
| "grad_norm": 0.23308177141808253, |
| "learning_rate": 1.5847569843995452e-05, |
| "loss": 0.4487, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.056223396211862564, |
| "step": 480, |
| "valid_targets_mean": 6885.0, |
| "valid_targets_min": 3388 |
| }, |
| { |
| "epoch": 3.0896, |
| "grad_norm": 0.24540446124861437, |
| "learning_rate": 1.5413345263560922e-05, |
| "loss": 0.4392, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0613599494099617, |
| "step": 485, |
| "valid_targets_mean": 6895.6, |
| "valid_targets_min": 1962 |
| }, |
| { |
| "epoch": 3.1216, |
| "grad_norm": 0.21470865271872294, |
| "learning_rate": 1.4981391115056032e-05, |
| "loss": 0.4471, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.055178817361593246, |
| "step": 490, |
| "valid_targets_mean": 6981.2, |
| "valid_targets_min": 3991 |
| }, |
| { |
| "epoch": 3.1536, |
| "grad_norm": 0.25039642475302604, |
| "learning_rate": 1.455192121934748e-05, |
| "loss": 0.4505, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.043909646570682526, |
| "step": 495, |
| "valid_targets_mean": 4949.1, |
| "valid_targets_min": 1221 |
| }, |
| { |
| "epoch": 3.1856, |
| "grad_norm": 0.23546977610301292, |
| "learning_rate": 1.4125148167576303e-05, |
| "loss": 0.4375, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06255581229925156, |
| "step": 500, |
| "valid_targets_mean": 6358.4, |
| "valid_targets_min": 2529 |
| }, |
| { |
| "epoch": 3.2176, |
| "grad_norm": 0.22661126639106943, |
| "learning_rate": 1.3701283215923563e-05, |
| "loss": 0.4411, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.057248782366514206, |
| "step": 505, |
| "valid_targets_mean": 6919.9, |
| "valid_targets_min": 3997 |
| }, |
| { |
| "epoch": 3.2496, |
| "grad_norm": 0.2341009567683006, |
| "learning_rate": 1.328053618103677e-05, |
| "loss": 0.4343, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06049735099077225, |
| "step": 510, |
| "valid_targets_mean": 6964.2, |
| "valid_targets_min": 1540 |
| }, |
| { |
| "epoch": 3.2816, |
| "grad_norm": 0.24058845331628828, |
| "learning_rate": 1.2863115336168916e-05, |
| "loss": 0.4436, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05956530570983887, |
| "step": 515, |
| "valid_targets_mean": 6419.1, |
| "valid_targets_min": 1842 |
| }, |
| { |
| "epoch": 3.3136, |
| "grad_norm": 0.23515050772410961, |
| "learning_rate": 1.2449227308081509e-05, |
| "loss": 0.4391, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05182071030139923, |
| "step": 520, |
| "valid_targets_mean": 6077.0, |
| "valid_targets_min": 2459 |
| }, |
| { |
| "epoch": 3.3456, |
| "grad_norm": 0.2409223008572993, |
| "learning_rate": 1.2039076974762587e-05, |
| "loss": 0.4357, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.058449625968933105, |
| "step": 525, |
| "valid_targets_mean": 5544.2, |
| "valid_targets_min": 1609 |
| }, |
| { |
| "epoch": 3.3776, |
| "grad_norm": 0.2435321263592678, |
| "learning_rate": 1.163286736401044e-05, |
| "loss": 0.4511, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04596813768148422, |
| "step": 530, |
| "valid_targets_mean": 4302.5, |
| "valid_targets_min": 1889 |
| }, |
| { |
| "epoch": 3.4096, |
| "grad_norm": 0.23358140517262413, |
| "learning_rate": 1.123079955293322e-05, |
| "loss": 0.4534, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06118956208229065, |
| "step": 535, |
| "valid_targets_mean": 6056.1, |
| "valid_targets_min": 3557 |
| }, |
| { |
| "epoch": 3.4416, |
| "grad_norm": 0.22861739864670938, |
| "learning_rate": 1.0833072568414037e-05, |
| "loss": 0.4274, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0649193823337555, |
| "step": 540, |
| "valid_targets_mean": 6476.8, |
| "valid_targets_min": 4200 |
| }, |
| { |
| "epoch": 3.4736000000000002, |
| "grad_norm": 0.2748627960264402, |
| "learning_rate": 1.0439883288591057e-05, |
| "loss": 0.4422, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.059561409056186676, |
| "step": 545, |
| "valid_targets_mean": 5730.4, |
| "valid_targets_min": 3802 |
| }, |
| { |
| "epoch": 3.5056000000000003, |
| "grad_norm": 0.22490252509071082, |
| "learning_rate": 1.0051426345401202e-05, |
| "loss": 0.4448, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06985727697610855, |
| "step": 550, |
| "valid_targets_mean": 6299.5, |
| "valid_targets_min": 4746 |
| }, |
| { |
| "epoch": 3.5376, |
| "grad_norm": 0.23881343469516417, |
| "learning_rate": 9.667894028235704e-06, |
| "loss": 0.4494, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.050563450902700424, |
| "step": 555, |
| "valid_targets_mean": 4656.5, |
| "valid_targets_min": 1264 |
| }, |
| { |
| "epoch": 3.5696, |
| "grad_norm": 0.21558144370020368, |
| "learning_rate": 9.289476188755315e-06, |
| "loss": 0.4393, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05615207552909851, |
| "step": 560, |
| "valid_targets_mean": 5605.1, |
| "valid_targets_min": 1266 |
| }, |
| { |
| "epoch": 3.6016, |
| "grad_norm": 0.22366375588682952, |
| "learning_rate": 8.916360146912122e-06, |
| "loss": 0.4392, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05911038815975189, |
| "step": 565, |
| "valid_targets_mean": 6945.1, |
| "valid_targets_min": 3611 |
| }, |
| { |
| "epoch": 3.6336, |
| "grad_norm": 0.22562763539959024, |
| "learning_rate": 8.548730598224646e-06, |
| "loss": 0.4485, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05786176770925522, |
| "step": 570, |
| "valid_targets_mean": 6466.4, |
| "valid_targets_min": 3579 |
| }, |
| { |
| "epoch": 3.6656, |
| "grad_norm": 0.22026046202865132, |
| "learning_rate": 8.186769522352053e-06, |
| "loss": 0.438, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.055450744926929474, |
| "step": 575, |
| "valid_targets_mean": 6839.8, |
| "valid_targets_min": 5002 |
| }, |
| { |
| "epoch": 3.6976, |
| "grad_norm": 0.24145961528777912, |
| "learning_rate": 7.830656093012714e-06, |
| "loss": 0.4501, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05287020653486252, |
| "step": 580, |
| "valid_targets_mean": 4996.0, |
| "valid_targets_min": 1314 |
| }, |
| { |
| "epoch": 3.7296, |
| "grad_norm": 0.23562303255829792, |
| "learning_rate": 7.480566589291696e-06, |
| "loss": 0.4438, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.051040396094322205, |
| "step": 585, |
| "valid_targets_mean": 5587.4, |
| "valid_targets_min": 1137 |
| }, |
| { |
| "epoch": 3.7616, |
| "grad_norm": 0.23136089168005522, |
| "learning_rate": 7.1366743083812285e-06, |
| "loss": 0.4454, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06240808591246605, |
| "step": 590, |
| "valid_targets_mean": 7138.6, |
| "valid_targets_min": 2866 |
| }, |
| { |
| "epoch": 3.7936, |
| "grad_norm": 0.22707983412801, |
| "learning_rate": 6.799149479797101e-06, |
| "loss": 0.4496, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05835655331611633, |
| "step": 595, |
| "valid_targets_mean": 6655.6, |
| "valid_targets_min": 4918 |
| }, |
| { |
| "epoch": 3.8256, |
| "grad_norm": 0.21880656960601397, |
| "learning_rate": 6.4681591811137e-06, |
| "loss": 0.4453, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.055026598274707794, |
| "step": 600, |
| "valid_targets_mean": 5937.6, |
| "valid_targets_min": 2526 |
| }, |
| { |
| "epoch": 3.8576, |
| "grad_norm": 0.229570941917756, |
| "learning_rate": 6.143867255259197e-06, |
| "loss": 0.4469, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06819195300340652, |
| "step": 605, |
| "valid_targets_mean": 6309.0, |
| "valid_targets_min": 4153 |
| }, |
| { |
| "epoch": 3.8895999999999997, |
| "grad_norm": 0.24193949938723308, |
| "learning_rate": 5.8264342294119504e-06, |
| "loss": 0.4522, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.047439537942409515, |
| "step": 610, |
| "valid_targets_mean": 5028.2, |
| "valid_targets_min": 1820 |
| }, |
| { |
| "epoch": 3.9215999999999998, |
| "grad_norm": 0.22547416338058443, |
| "learning_rate": 5.516017235538258e-06, |
| "loss": 0.4455, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.059017449617385864, |
| "step": 615, |
| "valid_targets_mean": 6480.9, |
| "valid_targets_min": 1656 |
| }, |
| { |
| "epoch": 3.9536, |
| "grad_norm": 0.22848670190751794, |
| "learning_rate": 5.212769932610695e-06, |
| "loss": 0.4509, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05336993187665939, |
| "step": 620, |
| "valid_targets_mean": 5756.1, |
| "valid_targets_min": 3313 |
| }, |
| { |
| "epoch": 3.9856, |
| "grad_norm": 0.2210847079221754, |
| "learning_rate": 4.916842430545681e-06, |
| "loss": 0.4456, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.050336554646492004, |
| "step": 625, |
| "valid_targets_mean": 5649.6, |
| "valid_targets_min": 3889 |
| }, |
| { |
| "epoch": 4.0128, |
| "grad_norm": 0.23421080498920668, |
| "learning_rate": 4.628381215897837e-06, |
| "loss": 0.4496, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05329183489084244, |
| "step": 630, |
| "valid_targets_mean": 6462.2, |
| "valid_targets_min": 2967 |
| }, |
| { |
| "epoch": 4.0448, |
| "grad_norm": 0.21264503510850571, |
| "learning_rate": 4.347529079347914e-06, |
| "loss": 0.4407, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04397914558649063, |
| "step": 635, |
| "valid_targets_mean": 4318.1, |
| "valid_targets_min": 1059 |
| }, |
| { |
| "epoch": 4.0768, |
| "grad_norm": 0.22648256558745927, |
| "learning_rate": 4.074425045020247e-06, |
| "loss": 0.4375, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06421341747045517, |
| "step": 640, |
| "valid_targets_mean": 7351.6, |
| "valid_targets_min": 4765 |
| }, |
| { |
| "epoch": 4.1088, |
| "grad_norm": 0.21654561780522466, |
| "learning_rate": 3.8092043016646487e-06, |
| "loss": 0.4354, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05705592781305313, |
| "step": 645, |
| "valid_targets_mean": 6272.0, |
| "valid_targets_min": 4011 |
| }, |
| { |
| "epoch": 4.1408, |
| "grad_norm": 0.22111062823834582, |
| "learning_rate": 3.551998135736867e-06, |
| "loss": 0.4491, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.06004402041435242, |
| "step": 650, |
| "valid_targets_mean": 8276.9, |
| "valid_targets_min": 5253 |
| }, |
| { |
| "epoch": 4.1728, |
| "grad_norm": 0.22185240108484003, |
| "learning_rate": 3.3029338664107267e-06, |
| "loss": 0.4489, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05593833327293396, |
| "step": 655, |
| "valid_targets_mean": 6135.6, |
| "valid_targets_min": 4737 |
| }, |
| { |
| "epoch": 4.2048, |
| "grad_norm": 0.23136979037427247, |
| "learning_rate": 3.0621347825540625e-06, |
| "loss": 0.4326, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.052786361426115036, |
| "step": 660, |
| "valid_targets_mean": 6988.9, |
| "valid_targets_min": 2344 |
| }, |
| { |
| "epoch": 4.2368, |
| "grad_norm": 0.22278564270605972, |
| "learning_rate": 2.8297200816997183e-06, |
| "loss": 0.4539, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05915951728820801, |
| "step": 665, |
| "valid_targets_mean": 6974.9, |
| "valid_targets_min": 4528 |
| }, |
| { |
| "epoch": 4.2688, |
| "grad_norm": 0.248074738761296, |
| "learning_rate": 2.605804811041803e-06, |
| "loss": 0.4416, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05227920413017273, |
| "step": 670, |
| "valid_targets_mean": 6666.6, |
| "valid_targets_min": 2025 |
| }, |
| { |
| "epoch": 4.3008, |
| "grad_norm": 0.21281904492589643, |
| "learning_rate": 2.390499810486351e-06, |
| "loss": 0.4319, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05795406550168991, |
| "step": 675, |
| "valid_targets_mean": 5690.2, |
| "valid_targets_min": 3571 |
| }, |
| { |
| "epoch": 4.3328, |
| "grad_norm": 0.228176572507057, |
| "learning_rate": 2.183911657784685e-06, |
| "loss": 0.449, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.03352051600813866, |
| "step": 680, |
| "valid_targets_mean": 3722.4, |
| "valid_targets_min": 1244 |
| }, |
| { |
| "epoch": 4.3648, |
| "grad_norm": 0.21223783591770465, |
| "learning_rate": 1.986142615776532e-06, |
| "loss": 0.4384, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.050721943378448486, |
| "step": 685, |
| "valid_targets_mean": 5113.6, |
| "valid_targets_min": 1247 |
| }, |
| { |
| "epoch": 4.3968, |
| "grad_norm": 0.21066498944882028, |
| "learning_rate": 1.7972905817690644e-06, |
| "loss": 0.433, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.060176365077495575, |
| "step": 690, |
| "valid_targets_mean": 6367.5, |
| "valid_targets_min": 3261 |
| }, |
| { |
| "epoch": 4.4288, |
| "grad_norm": 0.2283554825531617, |
| "learning_rate": 1.617449039076955e-06, |
| "loss": 0.4378, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04219736531376839, |
| "step": 695, |
| "valid_targets_mean": 4555.1, |
| "valid_targets_min": 1703 |
| }, |
| { |
| "epoch": 4.4608, |
| "grad_norm": 0.24248521340764762, |
| "learning_rate": 1.4467070107473413e-06, |
| "loss": 0.4399, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04724803939461708, |
| "step": 700, |
| "valid_targets_mean": 5124.2, |
| "valid_targets_min": 2155 |
| }, |
| { |
| "epoch": 4.4928, |
| "grad_norm": 0.21532123487455646, |
| "learning_rate": 1.2851490154926816e-06, |
| "loss": 0.4557, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05728413909673691, |
| "step": 705, |
| "valid_targets_mean": 7353.1, |
| "valid_targets_min": 3568 |
| }, |
| { |
| "epoch": 4.5248, |
| "grad_norm": 0.21904250721230045, |
| "learning_rate": 1.1328550258533211e-06, |
| "loss": 0.4484, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.054167069494724274, |
| "step": 710, |
| "valid_targets_mean": 6127.1, |
| "valid_targets_min": 3463 |
| }, |
| { |
| "epoch": 4.5568, |
| "grad_norm": 0.2196771726065467, |
| "learning_rate": 9.899004286103953e-07, |
| "loss": 0.4381, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.060034722089767456, |
| "step": 715, |
| "valid_targets_mean": 6735.6, |
| "valid_targets_min": 3212 |
| }, |
| { |
| "epoch": 4.5888, |
| "grad_norm": 0.22226145118483576, |
| "learning_rate": 8.5635598746876e-07, |
| "loss": 0.4361, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0540115162730217, |
| "step": 720, |
| "valid_targets_mean": 5748.1, |
| "valid_targets_min": 1112 |
| }, |
| { |
| "epoch": 4.6208, |
| "grad_norm": 0.20622192219828592, |
| "learning_rate": 7.32287808028389e-07, |
| "loss": 0.4368, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.0476539321243763, |
| "step": 725, |
| "valid_targets_mean": 6541.5, |
| "valid_targets_min": 4327 |
| }, |
| { |
| "epoch": 4.6528, |
| "grad_norm": 0.2654139277006733, |
| "learning_rate": 6.177573050615327e-07, |
| "loss": 0.4412, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04480193182826042, |
| "step": 730, |
| "valid_targets_mean": 5015.1, |
| "valid_targets_min": 1454 |
| }, |
| { |
| "epoch": 4.6848, |
| "grad_norm": 0.21237859053464736, |
| "learning_rate": 5.128211721119213e-07, |
| "loss": 0.4393, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04254649952054024, |
| "step": 735, |
| "valid_targets_mean": 4372.5, |
| "valid_targets_min": 1393 |
| }, |
| { |
| "epoch": 4.7168, |
| "grad_norm": 0.21207695678126648, |
| "learning_rate": 4.175313534309755e-07, |
| "loss": 0.4249, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04434927552938461, |
| "step": 740, |
| "valid_targets_mean": 5530.1, |
| "valid_targets_min": 2971 |
| }, |
| { |
| "epoch": 4.7488, |
| "grad_norm": 0.21160588431886262, |
| "learning_rate": 3.319350182649861e-07, |
| "loss": 0.4496, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.04617893695831299, |
| "step": 745, |
| "valid_targets_mean": 5305.0, |
| "valid_targets_min": 1909 |
| }, |
| { |
| "epoch": 4.7808, |
| "grad_norm": 0.20575094904529548, |
| "learning_rate": 2.560745375059392e-07, |
| "loss": 0.4393, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05063258856534958, |
| "step": 750, |
| "valid_targets_mean": 5929.2, |
| "valid_targets_min": 4302 |
| }, |
| { |
| "epoch": 4.8128, |
| "grad_norm": 0.2257345328349728, |
| "learning_rate": 1.8998746271758016e-07, |
| "loss": 0.4408, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.045826494693756104, |
| "step": 755, |
| "valid_targets_mean": 5185.9, |
| "valid_targets_min": 3921 |
| }, |
| { |
| "epoch": 4.8448, |
| "grad_norm": 0.22650859111872304, |
| "learning_rate": 1.337065075470778e-07, |
| "loss": 0.4367, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.054923709481954575, |
| "step": 760, |
| "valid_targets_mean": 5927.9, |
| "valid_targets_min": 1465 |
| }, |
| { |
| "epoch": 4.8768, |
| "grad_norm": 0.20206724083660776, |
| "learning_rate": 8.725953153150279e-08, |
| "loss": 0.4331, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.054176587611436844, |
| "step": 765, |
| "valid_targets_mean": 5933.9, |
| "valid_targets_min": 1252 |
| }, |
| { |
| "epoch": 4.9088, |
| "grad_norm": 0.20947217249132233, |
| "learning_rate": 5.066952630711886e-08, |
| "loss": 0.4359, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.05193956196308136, |
| "step": 770, |
| "valid_targets_mean": 7103.0, |
| "valid_targets_min": 2688 |
| }, |
| { |
| "epoch": 4.9408, |
| "grad_norm": 0.22177657633147466, |
| "learning_rate": 2.3954604228342283e-08, |
| "loss": 0.4376, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.053807418793439865, |
| "step": 775, |
| "valid_targets_mean": 6107.4, |
| "valid_targets_min": 3419 |
| }, |
| { |
| "epoch": 4.9728, |
| "grad_norm": 0.20408953843665317, |
| "learning_rate": 7.12798940197601e-09, |
| "loss": 0.4326, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.047702357172966, |
| "step": 780, |
| "valid_targets_mean": 5425.0, |
| "valid_targets_min": 1484 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.46075961111833535, |
| "learning_rate": 1.9801114115480802e-10, |
| "loss": 0.4472, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.23474955558776855, |
| "step": 785, |
| "valid_targets_mean": 5649.8, |
| "valid_targets_min": 1507 |
| }, |
| { |
| "epoch": 5.0, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.23474955558776855, |
| "step": 785, |
| "total_flos": 2.415833197137887e+18, |
| "train_loss": 0.47492863266331375, |
| "train_runtime": 30736.1846, |
| "train_samples_per_second": 1.627, |
| "train_steps_per_second": 0.026, |
| "valid_targets_mean": 5649.8, |
| "valid_targets_min": 1507 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 785, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.415833197137887e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|