penfever's picture
End of training
336d4a6 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 500,
"global_step": 441,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 7.505872381862423,
"learning_rate": 3.555555555555556e-06,
"loss": 0.8889,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3562159836292267,
"step": 5,
"valid_targets_mean": 2130.2,
"valid_targets_min": 1151
},
{
"epoch": 0.16,
"grad_norm": 4.299941064882982,
"learning_rate": 8.000000000000001e-06,
"loss": 0.8268,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.43203479051589966,
"step": 10,
"valid_targets_mean": 2883.6,
"valid_targets_min": 1121
},
{
"epoch": 0.24,
"grad_norm": 2.284058440682455,
"learning_rate": 1.2444444444444446e-05,
"loss": 0.7614,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3178209662437439,
"step": 15,
"valid_targets_mean": 2271.4,
"valid_targets_min": 779
},
{
"epoch": 0.32,
"grad_norm": 1.208566277480267,
"learning_rate": 1.688888888888889e-05,
"loss": 0.6515,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.34249401092529297,
"step": 20,
"valid_targets_mean": 2947.9,
"valid_targets_min": 1111
},
{
"epoch": 0.4,
"grad_norm": 1.1034086292105623,
"learning_rate": 2.1333333333333335e-05,
"loss": 0.5973,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.32616934180259705,
"step": 25,
"valid_targets_mean": 3281.1,
"valid_targets_min": 873
},
{
"epoch": 0.48,
"grad_norm": 1.0503267052677303,
"learning_rate": 2.577777777777778e-05,
"loss": 0.6453,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3611135482788086,
"step": 30,
"valid_targets_mean": 2335.5,
"valid_targets_min": 1311
},
{
"epoch": 0.56,
"grad_norm": 0.9194708186912789,
"learning_rate": 3.0222222222222225e-05,
"loss": 0.5684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.26414716243743896,
"step": 35,
"valid_targets_mean": 2443.0,
"valid_targets_min": 1337
},
{
"epoch": 0.64,
"grad_norm": 0.8752103356753728,
"learning_rate": 3.466666666666667e-05,
"loss": 0.5656,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.3691626489162445,
"step": 40,
"valid_targets_mean": 3333.5,
"valid_targets_min": 989
},
{
"epoch": 0.72,
"grad_norm": 0.7978234001550478,
"learning_rate": 3.9111111111111115e-05,
"loss": 0.5234,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28348302841186523,
"step": 45,
"valid_targets_mean": 2966.8,
"valid_targets_min": 886
},
{
"epoch": 0.8,
"grad_norm": 0.7861038745079318,
"learning_rate": 3.9989930847663706e-05,
"loss": 0.53,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.28643012046813965,
"step": 50,
"valid_targets_mean": 3180.1,
"valid_targets_min": 1358
},
{
"epoch": 0.88,
"grad_norm": 0.8055444173490995,
"learning_rate": 3.994904229220507e-05,
"loss": 0.5412,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.291437566280365,
"step": 55,
"valid_targets_mean": 2481.9,
"valid_targets_min": 1418
},
{
"epoch": 0.96,
"grad_norm": 0.8278524255463231,
"learning_rate": 3.9876769289225084e-05,
"loss": 0.4896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22183263301849365,
"step": 60,
"valid_targets_mean": 1879.6,
"valid_targets_min": 981
},
{
"epoch": 1.032,
"grad_norm": 0.8696048731655817,
"learning_rate": 3.977322554083716e-05,
"loss": 0.5374,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.43486517667770386,
"step": 65,
"valid_targets_mean": 3426.5,
"valid_targets_min": 1408
},
{
"epoch": 1.112,
"grad_norm": 0.7781509716799682,
"learning_rate": 3.963857394525413e-05,
"loss": 0.488,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19639593362808228,
"step": 70,
"valid_targets_mean": 2309.4,
"valid_targets_min": 886
},
{
"epoch": 1.192,
"grad_norm": 0.8382160539678171,
"learning_rate": 3.947302634051182e-05,
"loss": 0.4376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20422697067260742,
"step": 75,
"valid_targets_mean": 1743.0,
"valid_targets_min": 1050
},
{
"epoch": 1.272,
"grad_norm": 0.8476519934522162,
"learning_rate": 3.9276843171198844e-05,
"loss": 0.4908,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20554912090301514,
"step": 80,
"valid_targets_mean": 2396.1,
"valid_targets_min": 1201
},
{
"epoch": 1.3519999999999999,
"grad_norm": 0.9006870601615783,
"learning_rate": 3.9050333078717216e-05,
"loss": 0.4667,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1725320816040039,
"step": 85,
"valid_targets_mean": 1700.5,
"valid_targets_min": 1071
},
{
"epoch": 1.432,
"grad_norm": 0.791325198841491,
"learning_rate": 3.879385241571817e-05,
"loss": 0.459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21249258518218994,
"step": 90,
"valid_targets_mean": 2356.9,
"valid_targets_min": 1122
},
{
"epoch": 1.512,
"grad_norm": 1.0385117113315703,
"learning_rate": 3.8507804685477223e-05,
"loss": 0.446,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21146586537361145,
"step": 95,
"valid_targets_mean": 1682.9,
"valid_targets_min": 873
},
{
"epoch": 1.592,
"grad_norm": 0.8011987888299815,
"learning_rate": 3.819263990709037e-05,
"loss": 0.443,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2212362438440323,
"step": 100,
"valid_targets_mean": 3187.1,
"valid_targets_min": 281
},
{
"epoch": 1.6720000000000002,
"grad_norm": 0.778909724911067,
"learning_rate": 3.78488539074902e-05,
"loss": 0.4578,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23308740556240082,
"step": 105,
"valid_targets_mean": 2398.9,
"valid_targets_min": 960
},
{
"epoch": 1.752,
"grad_norm": 0.7972257244582753,
"learning_rate": 3.74769875413957e-05,
"loss": 0.4481,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16984760761260986,
"step": 110,
"valid_targets_mean": 1965.9,
"valid_targets_min": 670
},
{
"epoch": 1.8319999999999999,
"grad_norm": 0.8844948592143324,
"learning_rate": 3.707762584042297e-05,
"loss": 0.4381,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24562954902648926,
"step": 115,
"valid_targets_mean": 2081.1,
"valid_targets_min": 1412
},
{
"epoch": 1.912,
"grad_norm": 1.053576485355223,
"learning_rate": 3.665139709269543e-05,
"loss": 0.4437,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2656170129776001,
"step": 120,
"valid_targets_mean": 1548.0,
"valid_targets_min": 903
},
{
"epoch": 1.992,
"grad_norm": 0.7689058933714845,
"learning_rate": 3.619897185440168e-05,
"loss": 0.4438,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.24418014287948608,
"step": 125,
"valid_targets_mean": 2581.8,
"valid_targets_min": 1259
},
{
"epoch": 2.064,
"grad_norm": 0.7326051302166501,
"learning_rate": 3.5721061894855756e-05,
"loss": 0.4077,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18473495543003082,
"step": 130,
"valid_targets_mean": 2903.2,
"valid_targets_min": 1410
},
{
"epoch": 2.144,
"grad_norm": 0.8708474099425602,
"learning_rate": 3.521841907671983e-05,
"loss": 0.4048,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2214871197938919,
"step": 135,
"valid_targets_mean": 2367.8,
"valid_targets_min": 1390
},
{
"epoch": 2.224,
"grad_norm": 0.7597533514662402,
"learning_rate": 3.469183417315066e-05,
"loss": 0.4083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1592719554901123,
"step": 140,
"valid_targets_mean": 2209.9,
"valid_targets_min": 1123
},
{
"epoch": 2.304,
"grad_norm": 0.8308288880226818,
"learning_rate": 3.4142135623730954e-05,
"loss": 0.4373,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2935639023780823,
"step": 145,
"valid_targets_mean": 3067.8,
"valid_targets_min": 684
},
{
"epoch": 2.384,
"grad_norm": 0.7814988864434326,
"learning_rate": 3.3570188231142647e-05,
"loss": 0.3945,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2506471872329712,
"step": 150,
"valid_targets_mean": 3332.8,
"valid_targets_min": 1190
},
{
"epoch": 2.464,
"grad_norm": 0.9693558608483495,
"learning_rate": 3.2976891800632775e-05,
"loss": 0.4123,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18241941928863525,
"step": 155,
"valid_targets_mean": 1810.9,
"valid_targets_min": 508
},
{
"epoch": 2.544,
"grad_norm": 0.9188887892272475,
"learning_rate": 3.2363179724412105e-05,
"loss": 0.4185,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17377954721450806,
"step": 160,
"valid_targets_mean": 1971.0,
"valid_targets_min": 906
},
{
"epoch": 2.624,
"grad_norm": 0.8184684644144737,
"learning_rate": 3.173001751321381e-05,
"loss": 0.3719,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21439319849014282,
"step": 165,
"valid_targets_mean": 2445.6,
"valid_targets_min": 1154
},
{
"epoch": 2.7039999999999997,
"grad_norm": 0.9397326224963451,
"learning_rate": 3.107840127732221e-05,
"loss": 0.3863,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17729023098945618,
"step": 170,
"valid_targets_mean": 1877.6,
"valid_targets_min": 990
},
{
"epoch": 2.784,
"grad_norm": 0.8420928524890345,
"learning_rate": 3.0409356159461447e-05,
"loss": 0.3715,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19148927927017212,
"step": 175,
"valid_targets_mean": 2280.6,
"valid_targets_min": 1077
},
{
"epoch": 2.864,
"grad_norm": 0.722070280229424,
"learning_rate": 2.9723934722009375e-05,
"loss": 0.3896,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11640667170286179,
"step": 180,
"valid_targets_mean": 2501.6,
"valid_targets_min": 1121
},
{
"epoch": 2.944,
"grad_norm": 0.7740124459994261,
"learning_rate": 2.9023215291074017e-05,
"loss": 0.3836,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.179210364818573,
"step": 185,
"valid_targets_mean": 2544.9,
"valid_targets_min": 1021
},
{
"epoch": 3.016,
"grad_norm": 0.8032029716283615,
"learning_rate": 2.8308300260037734e-05,
"loss": 0.3848,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1465587019920349,
"step": 190,
"valid_targets_mean": 2581.0,
"valid_targets_min": 1258
},
{
"epoch": 3.096,
"grad_norm": 1.0675192734720838,
"learning_rate": 2.758031435523801e-05,
"loss": 0.3376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19570258259773254,
"step": 195,
"valid_targets_mean": 1776.4,
"valid_targets_min": 998
},
{
"epoch": 3.176,
"grad_norm": 0.7941943435641896,
"learning_rate": 2.684040286651338e-05,
"loss": 0.3525,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17122869193553925,
"step": 200,
"valid_targets_mean": 2871.4,
"valid_targets_min": 1062
},
{
"epoch": 3.2560000000000002,
"grad_norm": 0.8590713481269158,
"learning_rate": 2.6089729845398144e-05,
"loss": 0.3631,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20491699874401093,
"step": 205,
"valid_targets_mean": 2635.2,
"valid_targets_min": 1136
},
{
"epoch": 3.336,
"grad_norm": 0.8124117326779144,
"learning_rate": 2.53294762738007e-05,
"loss": 0.3683,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15610741078853607,
"step": 210,
"valid_targets_mean": 2695.1,
"valid_targets_min": 1041
},
{
"epoch": 3.416,
"grad_norm": 0.9190062289677365,
"learning_rate": 2.4560838206046437e-05,
"loss": 0.3376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17463022470474243,
"step": 215,
"valid_targets_mean": 2569.9,
"valid_targets_min": 1514
},
{
"epoch": 3.496,
"grad_norm": 0.9065219155043758,
"learning_rate": 2.3785024887208207e-05,
"loss": 0.3517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22582875192165375,
"step": 220,
"valid_targets_mean": 2724.9,
"valid_targets_min": 1220
},
{
"epoch": 3.576,
"grad_norm": 1.0837756986609808,
"learning_rate": 2.3003256850684808e-05,
"loss": 0.359,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18150931596755981,
"step": 225,
"valid_targets_mean": 2179.9,
"valid_targets_min": 1399
},
{
"epoch": 3.656,
"grad_norm": 1.2484982875648234,
"learning_rate": 2.2216763998020222e-05,
"loss": 0.3463,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18959809839725494,
"step": 230,
"valid_targets_mean": 1788.8,
"valid_targets_min": 903
},
{
"epoch": 3.7359999999999998,
"grad_norm": 1.3179674271154644,
"learning_rate": 2.1426783663984648e-05,
"loss": 0.3523,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15963847935199738,
"step": 235,
"valid_targets_mean": 1711.9,
"valid_targets_min": 886
},
{
"epoch": 3.816,
"grad_norm": 1.2429310941711111,
"learning_rate": 2.063455866996136e-05,
"loss": 0.3419,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1572306901216507,
"step": 240,
"valid_targets_mean": 2487.1,
"valid_targets_min": 1122
},
{
"epoch": 3.896,
"grad_norm": 1.015916750182016,
"learning_rate": 1.9841335368701812e-05,
"loss": 0.3612,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14525054395198822,
"step": 245,
"valid_targets_mean": 1455.9,
"valid_targets_min": 1052
},
{
"epoch": 3.976,
"grad_norm": 0.7307166020752172,
"learning_rate": 1.9048361683525155e-05,
"loss": 0.3398,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1978192925453186,
"step": 250,
"valid_targets_mean": 4146.5,
"valid_targets_min": 1324
},
{
"epoch": 4.048,
"grad_norm": 0.7270768944589141,
"learning_rate": 1.8256885145046837e-05,
"loss": 0.3243,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1474921554327011,
"step": 255,
"valid_targets_mean": 3523.0,
"valid_targets_min": 1623
},
{
"epoch": 4.128,
"grad_norm": 0.8418803354786775,
"learning_rate": 1.7468150928525014e-05,
"loss": 0.3167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13115155696868896,
"step": 260,
"valid_targets_mean": 2395.5,
"valid_targets_min": 1216
},
{
"epoch": 4.208,
"grad_norm": 0.8360752789280677,
"learning_rate": 1.6683399894912522e-05,
"loss": 0.3013,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16828887164592743,
"step": 265,
"valid_targets_mean": 2699.0,
"valid_targets_min": 1050
},
{
"epoch": 4.288,
"grad_norm": 0.96113797783045,
"learning_rate": 1.590386663869619e-05,
"loss": 0.3216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15936484932899475,
"step": 270,
"valid_targets_mean": 2447.2,
"valid_targets_min": 1046
},
{
"epoch": 4.368,
"grad_norm": 0.833095764473076,
"learning_rate": 1.5130777545594824e-05,
"loss": 0.3127,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16047963500022888,
"step": 275,
"valid_targets_mean": 3236.5,
"valid_targets_min": 990
},
{
"epoch": 4.448,
"grad_norm": 0.947963665080153,
"learning_rate": 1.4365348863171406e-05,
"loss": 0.3083,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15773838758468628,
"step": 280,
"valid_targets_mean": 2548.2,
"valid_targets_min": 1062
},
{
"epoch": 4.5280000000000005,
"grad_norm": 1.0228687085990507,
"learning_rate": 1.3608784787395005e-05,
"loss": 0.3294,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.2106139063835144,
"step": 285,
"valid_targets_mean": 2470.1,
"valid_targets_min": 1847
},
{
"epoch": 4.608,
"grad_norm": 0.80850799889799,
"learning_rate": 1.2862275568162566e-05,
"loss": 0.3251,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20072545111179352,
"step": 290,
"valid_targets_mean": 3350.9,
"valid_targets_min": 1369
},
{
"epoch": 4.688,
"grad_norm": 1.012254046643678,
"learning_rate": 1.2126995636761174e-05,
"loss": 0.3169,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15989793837070465,
"step": 295,
"valid_targets_mean": 2194.9,
"valid_targets_min": 1198
},
{
"epoch": 4.768,
"grad_norm": 1.0563967564745178,
"learning_rate": 1.1404101758216568e-05,
"loss": 0.3043,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1755823791027069,
"step": 300,
"valid_targets_mean": 1960.2,
"valid_targets_min": 1028
},
{
"epoch": 4.848,
"grad_norm": 0.9311730256722943,
"learning_rate": 1.0694731211434788e-05,
"loss": 0.3113,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12437054514884949,
"step": 305,
"valid_targets_mean": 1777.2,
"valid_targets_min": 1210
},
{
"epoch": 4.928,
"grad_norm": 0.9776791335105928,
"learning_rate": 1.0000000000000006e-05,
"loss": 0.325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20600047707557678,
"step": 310,
"valid_targets_mean": 2911.2,
"valid_targets_min": 957
},
{
"epoch": 5.0,
"grad_norm": 1.4087833914171262,
"learning_rate": 9.32100109644328e-06,
"loss": 0.3011,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.29628846049308777,
"step": 315,
"valid_targets_mean": 3412.9,
"valid_targets_min": 1927
},
{
"epoch": 5.08,
"grad_norm": 1.1237962021877523,
"learning_rate": 8.658802722744589e-06,
"loss": 0.3003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1448991596698761,
"step": 320,
"valid_targets_mean": 1924.0,
"valid_targets_min": 1127
},
{
"epoch": 5.16,
"grad_norm": 0.9172596743702742,
"learning_rate": 8.014446669773061e-06,
"loss": 0.2765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.19501471519470215,
"step": 325,
"valid_targets_mean": 3372.5,
"valid_targets_min": 1486
},
{
"epoch": 5.24,
"grad_norm": 1.0573522542198426,
"learning_rate": 7.388946658309557e-06,
"loss": 0.3104,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13600924611091614,
"step": 330,
"valid_targets_mean": 1860.2,
"valid_targets_min": 626
},
{
"epoch": 5.32,
"grad_norm": 1.095408717419895,
"learning_rate": 6.7832867442298645e-06,
"loss": 0.2844,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14203448593616486,
"step": 335,
"valid_targets_mean": 3012.0,
"valid_targets_min": 1445
},
{
"epoch": 5.4,
"grad_norm": 0.8946946595676237,
"learning_rate": 6.198419770357764e-06,
"loss": 0.2976,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16260306537151337,
"step": 340,
"valid_targets_mean": 2969.6,
"valid_targets_min": 873
},
{
"epoch": 5.48,
"grad_norm": 0.8169352231394611,
"learning_rate": 5.635265867423321e-06,
"loss": 0.2998,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.17487314343452454,
"step": 345,
"valid_targets_mean": 3076.6,
"valid_targets_min": 1050
},
{
"epoch": 5.5600000000000005,
"grad_norm": 0.8163208743049927,
"learning_rate": 5.094711006484907e-06,
"loss": 0.2778,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08679372817277908,
"step": 350,
"valid_targets_mean": 2021.5,
"valid_targets_min": 338
},
{
"epoch": 5.64,
"grad_norm": 0.9952519880856041,
"learning_rate": 4.577605605092248e-06,
"loss": 0.285,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16831034421920776,
"step": 355,
"valid_targets_mean": 2338.1,
"valid_targets_min": 1084
},
{
"epoch": 5.72,
"grad_norm": 1.0756622420151967,
"learning_rate": 4.0847631893833566e-06,
"loss": 0.2852,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1444486677646637,
"step": 360,
"valid_targets_mean": 2133.6,
"valid_targets_min": 965
},
{
"epoch": 5.8,
"grad_norm": 0.9257674814633752,
"learning_rate": 3.616959114220162e-06,
"loss": 0.3002,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.23777079582214355,
"step": 365,
"valid_targets_mean": 2538.9,
"valid_targets_min": 1373
},
{
"epoch": 5.88,
"grad_norm": 0.8690699429240454,
"learning_rate": 3.174929343376374e-06,
"loss": 0.2841,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11037126183509827,
"step": 370,
"valid_targets_mean": 2433.8,
"valid_targets_min": 1237
},
{
"epoch": 5.96,
"grad_norm": 0.9718357574467225,
"learning_rate": 2.759369291696614e-06,
"loss": 0.3004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1343100368976593,
"step": 375,
"valid_targets_mean": 2798.5,
"valid_targets_min": 903
},
{
"epoch": 6.032,
"grad_norm": 0.9817652023621845,
"learning_rate": 2.3709327310483608e-06,
"loss": 0.2684,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12780410051345825,
"step": 380,
"valid_targets_mean": 2150.0,
"valid_targets_min": 998
},
{
"epoch": 6.112,
"grad_norm": 0.9014203224760045,
"learning_rate": 2.0102307617879367e-06,
"loss": 0.2766,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13069286942481995,
"step": 385,
"valid_targets_mean": 2268.4,
"valid_targets_min": 911
},
{
"epoch": 6.192,
"grad_norm": 1.0186765172032302,
"learning_rate": 1.6778308513586084e-06,
"loss": 0.2962,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11167597770690918,
"step": 390,
"valid_targets_mean": 1636.0,
"valid_targets_min": 693
},
{
"epoch": 6.272,
"grad_norm": 0.9007287293295244,
"learning_rate": 1.3742559415333267e-06,
"loss": 0.2762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10594399273395538,
"step": 395,
"valid_targets_mean": 2016.9,
"valid_targets_min": 1210
},
{
"epoch": 6.352,
"grad_norm": 0.946964378636132,
"learning_rate": 1.099983625706631e-06,
"loss": 0.2655,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1707034707069397,
"step": 400,
"valid_targets_mean": 3195.9,
"valid_targets_min": 1289
},
{
"epoch": 6.432,
"grad_norm": 0.7899732764847707,
"learning_rate": 8.554453975300258e-07,
"loss": 0.2789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08833309262990952,
"step": 405,
"valid_targets_mean": 2436.8,
"valid_targets_min": 888
},
{
"epoch": 6.5120000000000005,
"grad_norm": 0.8794195051438293,
"learning_rate": 6.410259720728751e-07,
"loss": 0.2992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09639259427785873,
"step": 410,
"valid_targets_mean": 2271.0,
"valid_targets_min": 984
},
{
"epoch": 6.592,
"grad_norm": 1.0137858039965408,
"learning_rate": 4.570626805768119e-07,
"loss": 0.273,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15096719563007355,
"step": 415,
"valid_targets_mean": 2673.6,
"valid_targets_min": 1106
},
{
"epoch": 6.672,
"grad_norm": 0.9819001117157581,
"learning_rate": 3.038449397558396e-07,
"loss": 0.2942,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.15650422871112823,
"step": 420,
"valid_targets_mean": 2339.1,
"valid_targets_min": 718
},
{
"epoch": 6.752,
"grad_norm": 0.8517024842857888,
"learning_rate": 1.8161379647706034e-07,
"loss": 0.2789,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11373068392276764,
"step": 425,
"valid_targets_mean": 2288.1,
"valid_targets_min": 1061
},
{
"epoch": 6.832,
"grad_norm": 0.9942173325428866,
"learning_rate": 9.056154853830823e-08,
"loss": 0.2924,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1729286015033722,
"step": 430,
"valid_targets_mean": 2370.9,
"valid_targets_min": 436
},
{
"epoch": 6.912,
"grad_norm": 0.8761392834472287,
"learning_rate": 3.083144213933853e-08,
"loss": 0.2918,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.0916639119386673,
"step": 435,
"valid_targets_mean": 2043.0,
"valid_targets_min": 1123
},
{
"epoch": 6.992,
"grad_norm": 1.0047486254280051,
"learning_rate": 2.5174465224986343e-09,
"loss": 0.2714,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1423904448747635,
"step": 440,
"valid_targets_mean": 1893.1,
"valid_targets_min": 626
},
{
"epoch": 7.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.259745717048645,
"step": 441,
"total_flos": 7.12082116746281e+16,
"train_loss": 0.38846772432732746,
"train_runtime": 2096.0086,
"train_samples_per_second": 3.336,
"train_steps_per_second": 0.21,
"valid_targets_mean": 3009.9,
"valid_targets_min": 873
}
],
"logging_steps": 5,
"max_steps": 441,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 1500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.12082116746281e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}