| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 441, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 7.505872381862423, | |
| "learning_rate": 3.555555555555556e-06, | |
| "loss": 0.8889, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3562159836292267, | |
| "step": 5, | |
| "valid_targets_mean": 2130.2, | |
| "valid_targets_min": 1151 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 4.299941064882982, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.8268, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43203479051589966, | |
| "step": 10, | |
| "valid_targets_mean": 2883.6, | |
| "valid_targets_min": 1121 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.284058440682455, | |
| "learning_rate": 1.2444444444444446e-05, | |
| "loss": 0.7614, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3178209662437439, | |
| "step": 15, | |
| "valid_targets_mean": 2271.4, | |
| "valid_targets_min": 779 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.208566277480267, | |
| "learning_rate": 1.688888888888889e-05, | |
| "loss": 0.6515, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.34249401092529297, | |
| "step": 20, | |
| "valid_targets_mean": 2947.9, | |
| "valid_targets_min": 1111 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.1034086292105623, | |
| "learning_rate": 2.1333333333333335e-05, | |
| "loss": 0.5973, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.32616934180259705, | |
| "step": 25, | |
| "valid_targets_mean": 3281.1, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.0503267052677303, | |
| "learning_rate": 2.577777777777778e-05, | |
| "loss": 0.6453, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3611135482788086, | |
| "step": 30, | |
| "valid_targets_mean": 2335.5, | |
| "valid_targets_min": 1311 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.9194708186912789, | |
| "learning_rate": 3.0222222222222225e-05, | |
| "loss": 0.5684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.26414716243743896, | |
| "step": 35, | |
| "valid_targets_mean": 2443.0, | |
| "valid_targets_min": 1337 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.8752103356753728, | |
| "learning_rate": 3.466666666666667e-05, | |
| "loss": 0.5656, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.3691626489162445, | |
| "step": 40, | |
| "valid_targets_mean": 3333.5, | |
| "valid_targets_min": 989 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.7978234001550478, | |
| "learning_rate": 3.9111111111111115e-05, | |
| "loss": 0.5234, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28348302841186523, | |
| "step": 45, | |
| "valid_targets_mean": 2966.8, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.7861038745079318, | |
| "learning_rate": 3.9989930847663706e-05, | |
| "loss": 0.53, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.28643012046813965, | |
| "step": 50, | |
| "valid_targets_mean": 3180.1, | |
| "valid_targets_min": 1358 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.8055444173490995, | |
| "learning_rate": 3.994904229220507e-05, | |
| "loss": 0.5412, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.291437566280365, | |
| "step": 55, | |
| "valid_targets_mean": 2481.9, | |
| "valid_targets_min": 1418 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.8278524255463231, | |
| "learning_rate": 3.9876769289225084e-05, | |
| "loss": 0.4896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22183263301849365, | |
| "step": 60, | |
| "valid_targets_mean": 1879.6, | |
| "valid_targets_min": 981 | |
| }, | |
| { | |
| "epoch": 1.032, | |
| "grad_norm": 0.8696048731655817, | |
| "learning_rate": 3.977322554083716e-05, | |
| "loss": 0.5374, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.43486517667770386, | |
| "step": 65, | |
| "valid_targets_mean": 3426.5, | |
| "valid_targets_min": 1408 | |
| }, | |
| { | |
| "epoch": 1.112, | |
| "grad_norm": 0.7781509716799682, | |
| "learning_rate": 3.963857394525413e-05, | |
| "loss": 0.488, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19639593362808228, | |
| "step": 70, | |
| "valid_targets_mean": 2309.4, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 1.192, | |
| "grad_norm": 0.8382160539678171, | |
| "learning_rate": 3.947302634051182e-05, | |
| "loss": 0.4376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20422697067260742, | |
| "step": 75, | |
| "valid_targets_mean": 1743.0, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 1.272, | |
| "grad_norm": 0.8476519934522162, | |
| "learning_rate": 3.9276843171198844e-05, | |
| "loss": 0.4908, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20554912090301514, | |
| "step": 80, | |
| "valid_targets_mean": 2396.1, | |
| "valid_targets_min": 1201 | |
| }, | |
| { | |
| "epoch": 1.3519999999999999, | |
| "grad_norm": 0.9006870601615783, | |
| "learning_rate": 3.9050333078717216e-05, | |
| "loss": 0.4667, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1725320816040039, | |
| "step": 85, | |
| "valid_targets_mean": 1700.5, | |
| "valid_targets_min": 1071 | |
| }, | |
| { | |
| "epoch": 1.432, | |
| "grad_norm": 0.791325198841491, | |
| "learning_rate": 3.879385241571817e-05, | |
| "loss": 0.459, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21249258518218994, | |
| "step": 90, | |
| "valid_targets_mean": 2356.9, | |
| "valid_targets_min": 1122 | |
| }, | |
| { | |
| "epoch": 1.512, | |
| "grad_norm": 1.0385117113315703, | |
| "learning_rate": 3.8507804685477223e-05, | |
| "loss": 0.446, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21146586537361145, | |
| "step": 95, | |
| "valid_targets_mean": 1682.9, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 1.592, | |
| "grad_norm": 0.8011987888299815, | |
| "learning_rate": 3.819263990709037e-05, | |
| "loss": 0.443, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2212362438440323, | |
| "step": 100, | |
| "valid_targets_mean": 3187.1, | |
| "valid_targets_min": 281 | |
| }, | |
| { | |
| "epoch": 1.6720000000000002, | |
| "grad_norm": 0.778909724911067, | |
| "learning_rate": 3.78488539074902e-05, | |
| "loss": 0.4578, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23308740556240082, | |
| "step": 105, | |
| "valid_targets_mean": 2398.9, | |
| "valid_targets_min": 960 | |
| }, | |
| { | |
| "epoch": 1.752, | |
| "grad_norm": 0.7972257244582753, | |
| "learning_rate": 3.74769875413957e-05, | |
| "loss": 0.4481, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16984760761260986, | |
| "step": 110, | |
| "valid_targets_mean": 1965.9, | |
| "valid_targets_min": 670 | |
| }, | |
| { | |
| "epoch": 1.8319999999999999, | |
| "grad_norm": 0.8844948592143324, | |
| "learning_rate": 3.707762584042297e-05, | |
| "loss": 0.4381, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24562954902648926, | |
| "step": 115, | |
| "valid_targets_mean": 2081.1, | |
| "valid_targets_min": 1412 | |
| }, | |
| { | |
| "epoch": 1.912, | |
| "grad_norm": 1.053576485355223, | |
| "learning_rate": 3.665139709269543e-05, | |
| "loss": 0.4437, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2656170129776001, | |
| "step": 120, | |
| "valid_targets_mean": 1548.0, | |
| "valid_targets_min": 903 | |
| }, | |
| { | |
| "epoch": 1.992, | |
| "grad_norm": 0.7689058933714845, | |
| "learning_rate": 3.619897185440168e-05, | |
| "loss": 0.4438, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.24418014287948608, | |
| "step": 125, | |
| "valid_targets_mean": 2581.8, | |
| "valid_targets_min": 1259 | |
| }, | |
| { | |
| "epoch": 2.064, | |
| "grad_norm": 0.7326051302166501, | |
| "learning_rate": 3.5721061894855756e-05, | |
| "loss": 0.4077, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18473495543003082, | |
| "step": 130, | |
| "valid_targets_mean": 2903.2, | |
| "valid_targets_min": 1410 | |
| }, | |
| { | |
| "epoch": 2.144, | |
| "grad_norm": 0.8708474099425602, | |
| "learning_rate": 3.521841907671983e-05, | |
| "loss": 0.4048, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2214871197938919, | |
| "step": 135, | |
| "valid_targets_mean": 2367.8, | |
| "valid_targets_min": 1390 | |
| }, | |
| { | |
| "epoch": 2.224, | |
| "grad_norm": 0.7597533514662402, | |
| "learning_rate": 3.469183417315066e-05, | |
| "loss": 0.4083, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1592719554901123, | |
| "step": 140, | |
| "valid_targets_mean": 2209.9, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 2.304, | |
| "grad_norm": 0.8308288880226818, | |
| "learning_rate": 3.4142135623730954e-05, | |
| "loss": 0.4373, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2935639023780823, | |
| "step": 145, | |
| "valid_targets_mean": 3067.8, | |
| "valid_targets_min": 684 | |
| }, | |
| { | |
| "epoch": 2.384, | |
| "grad_norm": 0.7814988864434326, | |
| "learning_rate": 3.3570188231142647e-05, | |
| "loss": 0.3945, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2506471872329712, | |
| "step": 150, | |
| "valid_targets_mean": 3332.8, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 2.464, | |
| "grad_norm": 0.9693558608483495, | |
| "learning_rate": 3.2976891800632775e-05, | |
| "loss": 0.4123, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18241941928863525, | |
| "step": 155, | |
| "valid_targets_mean": 1810.9, | |
| "valid_targets_min": 508 | |
| }, | |
| { | |
| "epoch": 2.544, | |
| "grad_norm": 0.9188887892272475, | |
| "learning_rate": 3.2363179724412105e-05, | |
| "loss": 0.4185, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17377954721450806, | |
| "step": 160, | |
| "valid_targets_mean": 1971.0, | |
| "valid_targets_min": 906 | |
| }, | |
| { | |
| "epoch": 2.624, | |
| "grad_norm": 0.8184684644144737, | |
| "learning_rate": 3.173001751321381e-05, | |
| "loss": 0.3719, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.21439319849014282, | |
| "step": 165, | |
| "valid_targets_mean": 2445.6, | |
| "valid_targets_min": 1154 | |
| }, | |
| { | |
| "epoch": 2.7039999999999997, | |
| "grad_norm": 0.9397326224963451, | |
| "learning_rate": 3.107840127732221e-05, | |
| "loss": 0.3863, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17729023098945618, | |
| "step": 170, | |
| "valid_targets_mean": 1877.6, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 2.784, | |
| "grad_norm": 0.8420928524890345, | |
| "learning_rate": 3.0409356159461447e-05, | |
| "loss": 0.3715, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19148927927017212, | |
| "step": 175, | |
| "valid_targets_mean": 2280.6, | |
| "valid_targets_min": 1077 | |
| }, | |
| { | |
| "epoch": 2.864, | |
| "grad_norm": 0.722070280229424, | |
| "learning_rate": 2.9723934722009375e-05, | |
| "loss": 0.3896, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11640667170286179, | |
| "step": 180, | |
| "valid_targets_mean": 2501.6, | |
| "valid_targets_min": 1121 | |
| }, | |
| { | |
| "epoch": 2.944, | |
| "grad_norm": 0.7740124459994261, | |
| "learning_rate": 2.9023215291074017e-05, | |
| "loss": 0.3836, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.179210364818573, | |
| "step": 185, | |
| "valid_targets_mean": 2544.9, | |
| "valid_targets_min": 1021 | |
| }, | |
| { | |
| "epoch": 3.016, | |
| "grad_norm": 0.8032029716283615, | |
| "learning_rate": 2.8308300260037734e-05, | |
| "loss": 0.3848, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1465587019920349, | |
| "step": 190, | |
| "valid_targets_mean": 2581.0, | |
| "valid_targets_min": 1258 | |
| }, | |
| { | |
| "epoch": 3.096, | |
| "grad_norm": 1.0675192734720838, | |
| "learning_rate": 2.758031435523801e-05, | |
| "loss": 0.3376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19570258259773254, | |
| "step": 195, | |
| "valid_targets_mean": 1776.4, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 3.176, | |
| "grad_norm": 0.7941943435641896, | |
| "learning_rate": 2.684040286651338e-05, | |
| "loss": 0.3525, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17122869193553925, | |
| "step": 200, | |
| "valid_targets_mean": 2871.4, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 3.2560000000000002, | |
| "grad_norm": 0.8590713481269158, | |
| "learning_rate": 2.6089729845398144e-05, | |
| "loss": 0.3631, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20491699874401093, | |
| "step": 205, | |
| "valid_targets_mean": 2635.2, | |
| "valid_targets_min": 1136 | |
| }, | |
| { | |
| "epoch": 3.336, | |
| "grad_norm": 0.8124117326779144, | |
| "learning_rate": 2.53294762738007e-05, | |
| "loss": 0.3683, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15610741078853607, | |
| "step": 210, | |
| "valid_targets_mean": 2695.1, | |
| "valid_targets_min": 1041 | |
| }, | |
| { | |
| "epoch": 3.416, | |
| "grad_norm": 0.9190062289677365, | |
| "learning_rate": 2.4560838206046437e-05, | |
| "loss": 0.3376, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17463022470474243, | |
| "step": 215, | |
| "valid_targets_mean": 2569.9, | |
| "valid_targets_min": 1514 | |
| }, | |
| { | |
| "epoch": 3.496, | |
| "grad_norm": 0.9065219155043758, | |
| "learning_rate": 2.3785024887208207e-05, | |
| "loss": 0.3517, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.22582875192165375, | |
| "step": 220, | |
| "valid_targets_mean": 2724.9, | |
| "valid_targets_min": 1220 | |
| }, | |
| { | |
| "epoch": 3.576, | |
| "grad_norm": 1.0837756986609808, | |
| "learning_rate": 2.3003256850684808e-05, | |
| "loss": 0.359, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18150931596755981, | |
| "step": 225, | |
| "valid_targets_mean": 2179.9, | |
| "valid_targets_min": 1399 | |
| }, | |
| { | |
| "epoch": 3.656, | |
| "grad_norm": 1.2484982875648234, | |
| "learning_rate": 2.2216763998020222e-05, | |
| "loss": 0.3463, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.18959809839725494, | |
| "step": 230, | |
| "valid_targets_mean": 1788.8, | |
| "valid_targets_min": 903 | |
| }, | |
| { | |
| "epoch": 3.7359999999999998, | |
| "grad_norm": 1.3179674271154644, | |
| "learning_rate": 2.1426783663984648e-05, | |
| "loss": 0.3523, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15963847935199738, | |
| "step": 235, | |
| "valid_targets_mean": 1711.9, | |
| "valid_targets_min": 886 | |
| }, | |
| { | |
| "epoch": 3.816, | |
| "grad_norm": 1.2429310941711111, | |
| "learning_rate": 2.063455866996136e-05, | |
| "loss": 0.3419, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1572306901216507, | |
| "step": 240, | |
| "valid_targets_mean": 2487.1, | |
| "valid_targets_min": 1122 | |
| }, | |
| { | |
| "epoch": 3.896, | |
| "grad_norm": 1.015916750182016, | |
| "learning_rate": 1.9841335368701812e-05, | |
| "loss": 0.3612, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14525054395198822, | |
| "step": 245, | |
| "valid_targets_mean": 1455.9, | |
| "valid_targets_min": 1052 | |
| }, | |
| { | |
| "epoch": 3.976, | |
| "grad_norm": 0.7307166020752172, | |
| "learning_rate": 1.9048361683525155e-05, | |
| "loss": 0.3398, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1978192925453186, | |
| "step": 250, | |
| "valid_targets_mean": 4146.5, | |
| "valid_targets_min": 1324 | |
| }, | |
| { | |
| "epoch": 4.048, | |
| "grad_norm": 0.7270768944589141, | |
| "learning_rate": 1.8256885145046837e-05, | |
| "loss": 0.3243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1474921554327011, | |
| "step": 255, | |
| "valid_targets_mean": 3523.0, | |
| "valid_targets_min": 1623 | |
| }, | |
| { | |
| "epoch": 4.128, | |
| "grad_norm": 0.8418803354786775, | |
| "learning_rate": 1.7468150928525014e-05, | |
| "loss": 0.3167, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13115155696868896, | |
| "step": 260, | |
| "valid_targets_mean": 2395.5, | |
| "valid_targets_min": 1216 | |
| }, | |
| { | |
| "epoch": 4.208, | |
| "grad_norm": 0.8360752789280677, | |
| "learning_rate": 1.6683399894912522e-05, | |
| "loss": 0.3013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16828887164592743, | |
| "step": 265, | |
| "valid_targets_mean": 2699.0, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 4.288, | |
| "grad_norm": 0.96113797783045, | |
| "learning_rate": 1.590386663869619e-05, | |
| "loss": 0.3216, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15936484932899475, | |
| "step": 270, | |
| "valid_targets_mean": 2447.2, | |
| "valid_targets_min": 1046 | |
| }, | |
| { | |
| "epoch": 4.368, | |
| "grad_norm": 0.833095764473076, | |
| "learning_rate": 1.5130777545594824e-05, | |
| "loss": 0.3127, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16047963500022888, | |
| "step": 275, | |
| "valid_targets_mean": 3236.5, | |
| "valid_targets_min": 990 | |
| }, | |
| { | |
| "epoch": 4.448, | |
| "grad_norm": 0.947963665080153, | |
| "learning_rate": 1.4365348863171406e-05, | |
| "loss": 0.3083, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15773838758468628, | |
| "step": 280, | |
| "valid_targets_mean": 2548.2, | |
| "valid_targets_min": 1062 | |
| }, | |
| { | |
| "epoch": 4.5280000000000005, | |
| "grad_norm": 1.0228687085990507, | |
| "learning_rate": 1.3608784787395005e-05, | |
| "loss": 0.3294, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2106139063835144, | |
| "step": 285, | |
| "valid_targets_mean": 2470.1, | |
| "valid_targets_min": 1847 | |
| }, | |
| { | |
| "epoch": 4.608, | |
| "grad_norm": 0.80850799889799, | |
| "learning_rate": 1.2862275568162566e-05, | |
| "loss": 0.3251, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20072545111179352, | |
| "step": 290, | |
| "valid_targets_mean": 3350.9, | |
| "valid_targets_min": 1369 | |
| }, | |
| { | |
| "epoch": 4.688, | |
| "grad_norm": 1.012254046643678, | |
| "learning_rate": 1.2126995636761174e-05, | |
| "loss": 0.3169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15989793837070465, | |
| "step": 295, | |
| "valid_targets_mean": 2194.9, | |
| "valid_targets_min": 1198 | |
| }, | |
| { | |
| "epoch": 4.768, | |
| "grad_norm": 1.0563967564745178, | |
| "learning_rate": 1.1404101758216568e-05, | |
| "loss": 0.3043, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1755823791027069, | |
| "step": 300, | |
| "valid_targets_mean": 1960.2, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 4.848, | |
| "grad_norm": 0.9311730256722943, | |
| "learning_rate": 1.0694731211434788e-05, | |
| "loss": 0.3113, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12437054514884949, | |
| "step": 305, | |
| "valid_targets_mean": 1777.2, | |
| "valid_targets_min": 1210 | |
| }, | |
| { | |
| "epoch": 4.928, | |
| "grad_norm": 0.9776791335105928, | |
| "learning_rate": 1.0000000000000006e-05, | |
| "loss": 0.325, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.20600047707557678, | |
| "step": 310, | |
| "valid_targets_mean": 2911.2, | |
| "valid_targets_min": 957 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.4087833914171262, | |
| "learning_rate": 9.32100109644328e-06, | |
| "loss": 0.3011, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.29628846049308777, | |
| "step": 315, | |
| "valid_targets_mean": 3412.9, | |
| "valid_targets_min": 1927 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 1.1237962021877523, | |
| "learning_rate": 8.658802722744589e-06, | |
| "loss": 0.3003, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1448991596698761, | |
| "step": 320, | |
| "valid_targets_mean": 1924.0, | |
| "valid_targets_min": 1127 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 0.9172596743702742, | |
| "learning_rate": 8.014446669773061e-06, | |
| "loss": 0.2765, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.19501471519470215, | |
| "step": 325, | |
| "valid_targets_mean": 3372.5, | |
| "valid_targets_min": 1486 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 1.0573522542198426, | |
| "learning_rate": 7.388946658309557e-06, | |
| "loss": 0.3104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13600924611091614, | |
| "step": 330, | |
| "valid_targets_mean": 1860.2, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 1.095408717419895, | |
| "learning_rate": 6.7832867442298645e-06, | |
| "loss": 0.2844, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.14203448593616486, | |
| "step": 335, | |
| "valid_targets_mean": 3012.0, | |
| "valid_targets_min": 1445 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.8946946595676237, | |
| "learning_rate": 6.198419770357764e-06, | |
| "loss": 0.2976, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16260306537151337, | |
| "step": 340, | |
| "valid_targets_mean": 2969.6, | |
| "valid_targets_min": 873 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 0.8169352231394611, | |
| "learning_rate": 5.635265867423321e-06, | |
| "loss": 0.2998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.17487314343452454, | |
| "step": 345, | |
| "valid_targets_mean": 3076.6, | |
| "valid_targets_min": 1050 | |
| }, | |
| { | |
| "epoch": 5.5600000000000005, | |
| "grad_norm": 0.8163208743049927, | |
| "learning_rate": 5.094711006484907e-06, | |
| "loss": 0.2778, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08679372817277908, | |
| "step": 350, | |
| "valid_targets_mean": 2021.5, | |
| "valid_targets_min": 338 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.9952519880856041, | |
| "learning_rate": 4.577605605092248e-06, | |
| "loss": 0.285, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.16831034421920776, | |
| "step": 355, | |
| "valid_targets_mean": 2338.1, | |
| "valid_targets_min": 1084 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 1.0756622420151967, | |
| "learning_rate": 4.0847631893833566e-06, | |
| "loss": 0.2852, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1444486677646637, | |
| "step": 360, | |
| "valid_targets_mean": 2133.6, | |
| "valid_targets_min": 965 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 0.9257674814633752, | |
| "learning_rate": 3.616959114220162e-06, | |
| "loss": 0.3002, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.23777079582214355, | |
| "step": 365, | |
| "valid_targets_mean": 2538.9, | |
| "valid_targets_min": 1373 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.8690699429240454, | |
| "learning_rate": 3.174929343376374e-06, | |
| "loss": 0.2841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11037126183509827, | |
| "step": 370, | |
| "valid_targets_mean": 2433.8, | |
| "valid_targets_min": 1237 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 0.9718357574467225, | |
| "learning_rate": 2.759369291696614e-06, | |
| "loss": 0.3004, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1343100368976593, | |
| "step": 375, | |
| "valid_targets_mean": 2798.5, | |
| "valid_targets_min": 903 | |
| }, | |
| { | |
| "epoch": 6.032, | |
| "grad_norm": 0.9817652023621845, | |
| "learning_rate": 2.3709327310483608e-06, | |
| "loss": 0.2684, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12780410051345825, | |
| "step": 380, | |
| "valid_targets_mean": 2150.0, | |
| "valid_targets_min": 998 | |
| }, | |
| { | |
| "epoch": 6.112, | |
| "grad_norm": 0.9014203224760045, | |
| "learning_rate": 2.0102307617879367e-06, | |
| "loss": 0.2766, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.13069286942481995, | |
| "step": 385, | |
| "valid_targets_mean": 2268.4, | |
| "valid_targets_min": 911 | |
| }, | |
| { | |
| "epoch": 6.192, | |
| "grad_norm": 1.0186765172032302, | |
| "learning_rate": 1.6778308513586084e-06, | |
| "loss": 0.2962, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11167597770690918, | |
| "step": 390, | |
| "valid_targets_mean": 1636.0, | |
| "valid_targets_min": 693 | |
| }, | |
| { | |
| "epoch": 6.272, | |
| "grad_norm": 0.9007287293295244, | |
| "learning_rate": 1.3742559415333267e-06, | |
| "loss": 0.2762, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.10594399273395538, | |
| "step": 395, | |
| "valid_targets_mean": 2016.9, | |
| "valid_targets_min": 1210 | |
| }, | |
| { | |
| "epoch": 6.352, | |
| "grad_norm": 0.946964378636132, | |
| "learning_rate": 1.099983625706631e-06, | |
| "loss": 0.2655, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1707034707069397, | |
| "step": 400, | |
| "valid_targets_mean": 3195.9, | |
| "valid_targets_min": 1289 | |
| }, | |
| { | |
| "epoch": 6.432, | |
| "grad_norm": 0.7899732764847707, | |
| "learning_rate": 8.554453975300258e-07, | |
| "loss": 0.2789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08833309262990952, | |
| "step": 405, | |
| "valid_targets_mean": 2436.8, | |
| "valid_targets_min": 888 | |
| }, | |
| { | |
| "epoch": 6.5120000000000005, | |
| "grad_norm": 0.8794195051438293, | |
| "learning_rate": 6.410259720728751e-07, | |
| "loss": 0.2992, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.09639259427785873, | |
| "step": 410, | |
| "valid_targets_mean": 2271.0, | |
| "valid_targets_min": 984 | |
| }, | |
| { | |
| "epoch": 6.592, | |
| "grad_norm": 1.0137858039965408, | |
| "learning_rate": 4.570626805768119e-07, | |
| "loss": 0.273, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15096719563007355, | |
| "step": 415, | |
| "valid_targets_mean": 2673.6, | |
| "valid_targets_min": 1106 | |
| }, | |
| { | |
| "epoch": 6.672, | |
| "grad_norm": 0.9819001117157581, | |
| "learning_rate": 3.038449397558396e-07, | |
| "loss": 0.2942, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.15650422871112823, | |
| "step": 420, | |
| "valid_targets_mean": 2339.1, | |
| "valid_targets_min": 718 | |
| }, | |
| { | |
| "epoch": 6.752, | |
| "grad_norm": 0.8517024842857888, | |
| "learning_rate": 1.8161379647706034e-07, | |
| "loss": 0.2789, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11373068392276764, | |
| "step": 425, | |
| "valid_targets_mean": 2288.1, | |
| "valid_targets_min": 1061 | |
| }, | |
| { | |
| "epoch": 6.832, | |
| "grad_norm": 0.9942173325428866, | |
| "learning_rate": 9.056154853830823e-08, | |
| "loss": 0.2924, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1729286015033722, | |
| "step": 430, | |
| "valid_targets_mean": 2370.9, | |
| "valid_targets_min": 436 | |
| }, | |
| { | |
| "epoch": 6.912, | |
| "grad_norm": 0.8761392834472287, | |
| "learning_rate": 3.083144213933853e-08, | |
| "loss": 0.2918, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0916639119386673, | |
| "step": 435, | |
| "valid_targets_mean": 2043.0, | |
| "valid_targets_min": 1123 | |
| }, | |
| { | |
| "epoch": 6.992, | |
| "grad_norm": 1.0047486254280051, | |
| "learning_rate": 2.5174465224986343e-09, | |
| "loss": 0.2714, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1423904448747635, | |
| "step": 440, | |
| "valid_targets_mean": 1893.1, | |
| "valid_targets_min": 626 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.259745717048645, | |
| "step": 441, | |
| "total_flos": 7.12082116746281e+16, | |
| "train_loss": 0.38846772432732746, | |
| "train_runtime": 2096.0086, | |
| "train_samples_per_second": 3.336, | |
| "train_steps_per_second": 0.21, | |
| "valid_targets_mean": 3009.9, | |
| "valid_targets_min": 873 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 441, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 1500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.12082116746281e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |