taskmaster2-4ep / trainer_state.json
penfever's picture
End of training
bcd3062 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 785,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.032,
"grad_norm": 7.631121043697976,
"learning_rate": 2.0253164556962026e-06,
"loss": 0.8891,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.22271543741226196,
"step": 5,
"valid_targets_mean": 2476.6,
"valid_targets_min": 1301
},
{
"epoch": 0.064,
"grad_norm": 5.27881420878073,
"learning_rate": 4.556962025316456e-06,
"loss": 0.8953,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20072758197784424,
"step": 10,
"valid_targets_mean": 2332.7,
"valid_targets_min": 1072
},
{
"epoch": 0.096,
"grad_norm": 2.6810809161759894,
"learning_rate": 7.08860759493671e-06,
"loss": 0.8167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.21659545600414276,
"step": 15,
"valid_targets_mean": 2635.2,
"valid_targets_min": 2017
},
{
"epoch": 0.128,
"grad_norm": 1.3612409092128206,
"learning_rate": 9.620253164556963e-06,
"loss": 0.7665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.20028266310691833,
"step": 20,
"valid_targets_mean": 2392.8,
"valid_targets_min": 834
},
{
"epoch": 0.16,
"grad_norm": 0.9712808216614238,
"learning_rate": 1.2151898734177216e-05,
"loss": 0.7092,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16015717387199402,
"step": 25,
"valid_targets_mean": 2036.3,
"valid_targets_min": 627
},
{
"epoch": 0.192,
"grad_norm": 0.7847495702942517,
"learning_rate": 1.468354430379747e-05,
"loss": 0.6989,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.16251808404922485,
"step": 30,
"valid_targets_mean": 2170.1,
"valid_targets_min": 695
},
{
"epoch": 0.224,
"grad_norm": 0.6368837163308867,
"learning_rate": 1.7215189873417723e-05,
"loss": 0.6551,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.18215426802635193,
"step": 35,
"valid_targets_mean": 2561.9,
"valid_targets_min": 1349
},
{
"epoch": 0.256,
"grad_norm": 0.4861435916193186,
"learning_rate": 1.974683544303798e-05,
"loss": 0.6345,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1406915932893753,
"step": 40,
"valid_targets_mean": 2587.4,
"valid_targets_min": 1156
},
{
"epoch": 0.288,
"grad_norm": 0.42086791320978584,
"learning_rate": 2.2278481012658228e-05,
"loss": 0.5955,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12135272473096848,
"step": 45,
"valid_targets_mean": 2122.2,
"valid_targets_min": 505
},
{
"epoch": 0.32,
"grad_norm": 0.419828484849555,
"learning_rate": 2.481012658227848e-05,
"loss": 0.5878,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13206100463867188,
"step": 50,
"valid_targets_mean": 2235.4,
"valid_targets_min": 891
},
{
"epoch": 0.352,
"grad_norm": 0.3947927235863584,
"learning_rate": 2.7341772151898737e-05,
"loss": 0.5708,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1331617832183838,
"step": 55,
"valid_targets_mean": 2493.7,
"valid_targets_min": 1072
},
{
"epoch": 0.384,
"grad_norm": 0.3677300729554212,
"learning_rate": 2.987341772151899e-05,
"loss": 0.5517,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13664039969444275,
"step": 60,
"valid_targets_mean": 2516.1,
"valid_targets_min": 748
},
{
"epoch": 0.416,
"grad_norm": 0.4072083040661874,
"learning_rate": 3.240506329113924e-05,
"loss": 0.572,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12455327808856964,
"step": 65,
"valid_targets_mean": 2005.4,
"valid_targets_min": 666
},
{
"epoch": 0.448,
"grad_norm": 0.3859323974434273,
"learning_rate": 3.49367088607595e-05,
"loss": 0.5393,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1278195083141327,
"step": 70,
"valid_targets_mean": 2248.9,
"valid_targets_min": 770
},
{
"epoch": 0.48,
"grad_norm": 0.3618871599465855,
"learning_rate": 3.746835443037975e-05,
"loss": 0.5479,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12145921587944031,
"step": 75,
"valid_targets_mean": 2457.3,
"valid_targets_min": 442
},
{
"epoch": 0.512,
"grad_norm": 0.3778666430950872,
"learning_rate": 4e-05,
"loss": 0.5262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1243448480963707,
"step": 80,
"valid_targets_mean": 2133.5,
"valid_targets_min": 712
},
{
"epoch": 0.544,
"grad_norm": 0.36204735116857517,
"learning_rate": 3.999504991751045e-05,
"loss": 0.5216,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11901817470788956,
"step": 85,
"valid_targets_mean": 2211.9,
"valid_targets_min": 659
},
{
"epoch": 0.576,
"grad_norm": 0.37136134435893786,
"learning_rate": 3.9980202120373464e-05,
"loss": 0.513,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11105550080537796,
"step": 90,
"valid_targets_mean": 1993.4,
"valid_targets_min": 670
},
{
"epoch": 0.608,
"grad_norm": 0.37525675194181013,
"learning_rate": 3.995546395837111e-05,
"loss": 0.5231,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1376522183418274,
"step": 95,
"valid_targets_mean": 2684.2,
"valid_targets_min": 974
},
{
"epoch": 0.64,
"grad_norm": 0.3824297270784445,
"learning_rate": 3.992084767709763e-05,
"loss": 0.5053,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1333228498697281,
"step": 100,
"valid_targets_mean": 2383.8,
"valid_targets_min": 886
},
{
"epoch": 0.672,
"grad_norm": 0.37527053538555377,
"learning_rate": 3.987637041189781e-05,
"loss": 0.5058,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.14271236956119537,
"step": 105,
"valid_targets_mean": 2695.1,
"valid_targets_min": 760
},
{
"epoch": 0.704,
"grad_norm": 0.3537104082629273,
"learning_rate": 3.982205417938482e-05,
"loss": 0.5003,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12837614119052887,
"step": 110,
"valid_targets_mean": 2870.2,
"valid_targets_min": 1437
},
{
"epoch": 0.736,
"grad_norm": 0.3509645219531119,
"learning_rate": 3.975792586654179e-05,
"loss": 0.4992,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12494358420372009,
"step": 115,
"valid_targets_mean": 2231.8,
"valid_targets_min": 576
},
{
"epoch": 0.768,
"grad_norm": 0.37140995188960296,
"learning_rate": 3.968401721741259e-05,
"loss": 0.501,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13252729177474976,
"step": 120,
"valid_targets_mean": 2579.4,
"valid_targets_min": 1069
},
{
"epoch": 0.8,
"grad_norm": 0.38592521508298305,
"learning_rate": 3.960036481738819e-05,
"loss": 0.5014,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10906288027763367,
"step": 125,
"valid_targets_mean": 2436.2,
"valid_targets_min": 1019
},
{
"epoch": 0.832,
"grad_norm": 0.3774469495344788,
"learning_rate": 3.950701007509667e-05,
"loss": 0.5088,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1347123384475708,
"step": 130,
"valid_targets_mean": 2156.8,
"valid_targets_min": 687
},
{
"epoch": 0.864,
"grad_norm": 0.35331778619634596,
"learning_rate": 3.940399920190552e-05,
"loss": 0.5004,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11867493391036987,
"step": 135,
"valid_targets_mean": 2269.1,
"valid_targets_min": 496
},
{
"epoch": 0.896,
"grad_norm": 0.381269763301234,
"learning_rate": 3.92913831890467e-05,
"loss": 0.4846,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11897442489862442,
"step": 140,
"valid_targets_mean": 2282.8,
"valid_targets_min": 729
},
{
"epoch": 0.928,
"grad_norm": 0.37635071567491307,
"learning_rate": 3.916921778237556e-05,
"loss": 0.4762,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12302634119987488,
"step": 145,
"valid_targets_mean": 2549.6,
"valid_targets_min": 812
},
{
"epoch": 0.96,
"grad_norm": 0.3696178527878047,
"learning_rate": 3.903756345477612e-05,
"loss": 0.4781,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13179175555706024,
"step": 150,
"valid_targets_mean": 2555.1,
"valid_targets_min": 987
},
{
"epoch": 0.992,
"grad_norm": 0.3583947059528666,
"learning_rate": 3.889648537622657e-05,
"loss": 0.4705,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11860240995883942,
"step": 155,
"valid_targets_mean": 2559.3,
"valid_targets_min": 676
},
{
"epoch": 1.0192,
"grad_norm": 0.3621488113478336,
"learning_rate": 3.874605338153952e-05,
"loss": 0.4765,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11035702377557755,
"step": 160,
"valid_targets_mean": 2256.6,
"valid_targets_min": 575
},
{
"epoch": 1.0512,
"grad_norm": 0.38027772195572573,
"learning_rate": 3.8586341935793265e-05,
"loss": 0.4875,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12132596969604492,
"step": 165,
"valid_targets_mean": 2224.2,
"valid_targets_min": 683
},
{
"epoch": 1.0832,
"grad_norm": 0.3831848460828967,
"learning_rate": 3.841743009747089e-05,
"loss": 0.4732,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.111229307949543,
"step": 170,
"valid_targets_mean": 2282.8,
"valid_targets_min": 858
},
{
"epoch": 1.1152,
"grad_norm": 0.3882380505987559,
"learning_rate": 3.8239401479325714e-05,
"loss": 0.4665,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11933496594429016,
"step": 175,
"valid_targets_mean": 2197.6,
"valid_targets_min": 535
},
{
"epoch": 1.1472,
"grad_norm": 0.38808772044969597,
"learning_rate": 3.8052344206992276e-05,
"loss": 0.4633,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1027202382683754,
"step": 180,
"valid_targets_mean": 2257.2,
"valid_targets_min": 1237
},
{
"epoch": 1.1792,
"grad_norm": 0.36293154506315095,
"learning_rate": 3.7856350875363396e-05,
"loss": 0.4553,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11203598231077194,
"step": 185,
"valid_targets_mean": 2293.1,
"valid_targets_min": 826
},
{
"epoch": 1.2112,
"grad_norm": 0.3689772416334428,
"learning_rate": 3.765151850275497e-05,
"loss": 0.4662,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11721555888652802,
"step": 190,
"valid_targets_mean": 2506.8,
"valid_targets_min": 735
},
{
"epoch": 1.2432,
"grad_norm": 0.43777762642017926,
"learning_rate": 3.7437948482881104e-05,
"loss": 0.4618,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12152767926454544,
"step": 195,
"valid_targets_mean": 3030.2,
"valid_targets_min": 1859
},
{
"epoch": 1.2752,
"grad_norm": 0.371935582678312,
"learning_rate": 3.721574653466336e-05,
"loss": 0.466,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11050638556480408,
"step": 200,
"valid_targets_mean": 2149.6,
"valid_targets_min": 824
},
{
"epoch": 1.3072,
"grad_norm": 0.37190151848447794,
"learning_rate": 3.698502264989903e-05,
"loss": 0.4619,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10513182729482651,
"step": 205,
"valid_targets_mean": 2345.4,
"valid_targets_min": 712
},
{
"epoch": 1.3392,
"grad_norm": 0.36687001281293485,
"learning_rate": 3.674589103881432e-05,
"loss": 0.4718,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13081791996955872,
"step": 210,
"valid_targets_mean": 3058.9,
"valid_targets_min": 1310
},
{
"epoch": 1.3712,
"grad_norm": 0.5244127872645981,
"learning_rate": 3.64984700735293e-05,
"loss": 0.4771,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11074385046958923,
"step": 215,
"valid_targets_mean": 2053.8,
"valid_targets_min": 544
},
{
"epoch": 1.4032,
"grad_norm": 0.37485388401896785,
"learning_rate": 3.624288222946273e-05,
"loss": 0.4658,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12147005647420883,
"step": 220,
"valid_targets_mean": 2652.4,
"valid_targets_min": 615
},
{
"epoch": 1.4352,
"grad_norm": 0.36922684378704207,
"learning_rate": 3.597925402470578e-05,
"loss": 0.4672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12403183430433273,
"step": 225,
"valid_targets_mean": 2192.9,
"valid_targets_min": 747
},
{
"epoch": 1.4672,
"grad_norm": 0.36374441036147753,
"learning_rate": 3.570771595739445e-05,
"loss": 0.4672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1292041838169098,
"step": 230,
"valid_targets_mean": 3004.4,
"valid_targets_min": 1832
},
{
"epoch": 1.4992,
"grad_norm": 0.3948554267634894,
"learning_rate": 3.5428402441111964e-05,
"loss": 0.4598,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12679752707481384,
"step": 235,
"valid_targets_mean": 2637.1,
"valid_targets_min": 946
},
{
"epoch": 1.5312000000000001,
"grad_norm": 0.37068345902272504,
"learning_rate": 3.5141451738352936e-05,
"loss": 0.4661,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09907030314207077,
"step": 240,
"valid_targets_mean": 2072.6,
"valid_targets_min": 627
},
{
"epoch": 1.5632000000000001,
"grad_norm": 0.3669539251059158,
"learning_rate": 3.4847005892082266e-05,
"loss": 0.4641,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11002381145954132,
"step": 245,
"valid_targets_mean": 2329.2,
"valid_targets_min": 1380
},
{
"epoch": 1.5952,
"grad_norm": 0.4096014954540597,
"learning_rate": 3.454521065542273e-05,
"loss": 0.4703,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11717953532934189,
"step": 250,
"valid_targets_mean": 2277.5,
"valid_targets_min": 988
},
{
"epoch": 1.6272,
"grad_norm": 0.39509938189105553,
"learning_rate": 3.423621541950597e-05,
"loss": 0.4594,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10379903018474579,
"step": 255,
"valid_targets_mean": 2348.1,
"valid_targets_min": 699
},
{
"epoch": 1.6592,
"grad_norm": 0.3756191077532937,
"learning_rate": 3.3920173139522664e-05,
"loss": 0.4615,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11246967315673828,
"step": 260,
"valid_targets_mean": 2493.1,
"valid_targets_min": 840
},
{
"epoch": 1.6912,
"grad_norm": 0.4044690200794231,
"learning_rate": 3.35972402590084e-05,
"loss": 0.4649,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.110806405544281,
"step": 265,
"valid_targets_mean": 2017.8,
"valid_targets_min": 717
},
{
"epoch": 1.7231999999999998,
"grad_norm": 0.5081640504507091,
"learning_rate": 3.326757663240291e-05,
"loss": 0.4549,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11942463368177414,
"step": 270,
"valid_targets_mean": 2584.5,
"valid_targets_min": 834
},
{
"epoch": 1.7551999999999999,
"grad_norm": 0.3927226443344469,
"learning_rate": 3.293134544592073e-05,
"loss": 0.4566,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12417890131473541,
"step": 275,
"valid_targets_mean": 2363.7,
"valid_targets_min": 1147
},
{
"epoch": 1.7872,
"grad_norm": 0.3823668344189796,
"learning_rate": 3.258871313677274e-05,
"loss": 0.4562,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12030941992998123,
"step": 280,
"valid_targets_mean": 2514.6,
"valid_targets_min": 1588
},
{
"epoch": 1.8192,
"grad_norm": 0.4080217126524122,
"learning_rate": 3.2239849310778316e-05,
"loss": 0.4522,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13041387498378754,
"step": 285,
"valid_targets_mean": 2502.2,
"valid_targets_min": 994
},
{
"epoch": 1.8512,
"grad_norm": 0.35720690950041145,
"learning_rate": 3.188492665840909e-05,
"loss": 0.4543,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09930306673049927,
"step": 290,
"valid_targets_mean": 2340.0,
"valid_targets_min": 672
},
{
"epoch": 1.8832,
"grad_norm": 0.3784197895262166,
"learning_rate": 3.1524120869305726e-05,
"loss": 0.4672,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11923228949308395,
"step": 295,
"valid_targets_mean": 2192.8,
"valid_targets_min": 805
},
{
"epoch": 1.9152,
"grad_norm": 0.3853261461806844,
"learning_rate": 3.11576105453101e-05,
"loss": 0.4582,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13272637128829956,
"step": 300,
"valid_targets_mean": 2641.6,
"valid_targets_min": 718
},
{
"epoch": 1.9472,
"grad_norm": 0.4086565083973346,
"learning_rate": 3.0785577112055916e-05,
"loss": 0.4568,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11800284683704376,
"step": 305,
"valid_targets_mean": 2454.9,
"valid_targets_min": 605
},
{
"epoch": 1.9792,
"grad_norm": 0.37246762381338155,
"learning_rate": 3.040820472916153e-05,
"loss": 0.4648,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1008586436510086,
"step": 310,
"valid_targets_mean": 1896.5,
"valid_targets_min": 663
},
{
"epoch": 2.0064,
"grad_norm": 0.3806650012392375,
"learning_rate": 3.002568019906939e-05,
"loss": 0.4546,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11685250699520111,
"step": 315,
"valid_targets_mean": 2345.5,
"valid_targets_min": 1122
},
{
"epoch": 2.0384,
"grad_norm": 0.3798085186564654,
"learning_rate": 2.963819287457733e-05,
"loss": 0.4469,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12193480134010315,
"step": 320,
"valid_targets_mean": 2326.6,
"valid_targets_min": 842
},
{
"epoch": 2.0704,
"grad_norm": 0.36905791747982847,
"learning_rate": 2.924593456510733e-05,
"loss": 0.4457,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1169414296746254,
"step": 325,
"valid_targets_mean": 2857.9,
"valid_targets_min": 962
},
{
"epoch": 2.1024,
"grad_norm": 0.3402606779146125,
"learning_rate": 2.8849099441758306e-05,
"loss": 0.4362,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09596966952085495,
"step": 330,
"valid_targets_mean": 2375.1,
"valid_targets_min": 631
},
{
"epoch": 2.1344,
"grad_norm": 0.36648780234004474,
"learning_rate": 2.844788394118979e-05,
"loss": 0.4407,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10836751013994217,
"step": 335,
"valid_targets_mean": 2527.1,
"valid_targets_min": 831
},
{
"epoch": 2.1664,
"grad_norm": 0.39668264319464347,
"learning_rate": 2.8042486668384164e-05,
"loss": 0.4459,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12774476408958435,
"step": 340,
"valid_targets_mean": 2352.4,
"valid_targets_min": 798
},
{
"epoch": 2.1984,
"grad_norm": 0.36386446276091866,
"learning_rate": 2.7633108298335582e-05,
"loss": 0.4454,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10319218039512634,
"step": 345,
"valid_targets_mean": 2521.3,
"valid_targets_min": 1315
},
{
"epoch": 2.2304,
"grad_norm": 0.3546475260205052,
"learning_rate": 2.721995147671416e-05,
"loss": 0.435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10272243618965149,
"step": 350,
"valid_targets_mean": 2621.2,
"valid_targets_min": 773
},
{
"epoch": 2.2624,
"grad_norm": 0.3630552906576787,
"learning_rate": 2.68032207195547e-05,
"loss": 0.4325,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10560596734285355,
"step": 355,
"valid_targets_mean": 2550.9,
"valid_targets_min": 779
},
{
"epoch": 2.2944,
"grad_norm": 0.38279271826493005,
"learning_rate": 2.6383122312019604e-05,
"loss": 0.4351,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1132790818810463,
"step": 360,
"valid_targets_mean": 2419.3,
"valid_targets_min": 720
},
{
"epoch": 2.3264,
"grad_norm": 0.39935145796247956,
"learning_rate": 2.595986420628597e-05,
"loss": 0.4414,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09891170263290405,
"step": 365,
"valid_targets_mean": 2249.1,
"valid_targets_min": 1164
},
{
"epoch": 2.3584,
"grad_norm": 0.3419394392882838,
"learning_rate": 2.5533655918607573e-05,
"loss": 0.4368,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10849423706531525,
"step": 370,
"valid_targets_mean": 2371.8,
"valid_targets_min": 843
},
{
"epoch": 2.3904,
"grad_norm": 0.3771680016371104,
"learning_rate": 2.510470842560259e-05,
"loss": 0.4442,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1079641729593277,
"step": 375,
"valid_targets_mean": 2512.6,
"valid_targets_min": 475
},
{
"epoch": 2.4224,
"grad_norm": 0.3707439432584615,
"learning_rate": 2.467323405981841e-05,
"loss": 0.4433,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1102166473865509,
"step": 380,
"valid_targets_mean": 2372.2,
"valid_targets_min": 724
},
{
"epoch": 2.4544,
"grad_norm": 0.3688167243996532,
"learning_rate": 2.423944640462533e-05,
"loss": 0.4379,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12143057584762573,
"step": 385,
"valid_targets_mean": 2571.7,
"valid_targets_min": 1490
},
{
"epoch": 2.4864,
"grad_norm": 0.372498854103246,
"learning_rate": 2.3803560188490968e-05,
"loss": 0.45,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10243070125579834,
"step": 390,
"valid_targets_mean": 2157.3,
"valid_targets_min": 603
},
{
"epoch": 2.5183999999999997,
"grad_norm": 0.3561390151542751,
"learning_rate": 2.336579117868789e-05,
"loss": 0.4435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11096415668725967,
"step": 395,
"valid_targets_mean": 2564.1,
"valid_targets_min": 1146
},
{
"epoch": 2.5504,
"grad_norm": 0.3467986057670459,
"learning_rate": 2.292635607448711e-05,
"loss": 0.4391,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09745585918426514,
"step": 400,
"valid_targets_mean": 2261.1,
"valid_targets_min": 484
},
{
"epoch": 2.5824,
"grad_norm": 0.38784817744913735,
"learning_rate": 2.248547239989008e-05,
"loss": 0.4456,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10883487015962601,
"step": 405,
"valid_targets_mean": 2358.3,
"valid_targets_min": 589
},
{
"epoch": 2.6144,
"grad_norm": 0.39520877797364024,
"learning_rate": 2.204335839595255e-05,
"loss": 0.4491,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11191672086715698,
"step": 410,
"valid_targets_mean": 2358.7,
"valid_targets_min": 861
},
{
"epoch": 2.6464,
"grad_norm": 0.3501064725751044,
"learning_rate": 2.1600232912753452e-05,
"loss": 0.4376,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10743163526058197,
"step": 415,
"valid_targets_mean": 2539.9,
"valid_targets_min": 615
},
{
"epoch": 2.6784,
"grad_norm": 0.34747271508177885,
"learning_rate": 2.1156315301062293e-05,
"loss": 0.4406,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11140751093626022,
"step": 420,
"valid_targets_mean": 2474.9,
"valid_targets_min": 644
},
{
"epoch": 2.7104,
"grad_norm": 0.32707554036034925,
"learning_rate": 2.0711825303758712e-05,
"loss": 0.4396,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11559329926967621,
"step": 425,
"valid_targets_mean": 3053.6,
"valid_targets_min": 920
},
{
"epoch": 2.7424,
"grad_norm": 0.38640280327503035,
"learning_rate": 2.0266982947057962e-05,
"loss": 0.4416,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12284263968467712,
"step": 430,
"valid_targets_mean": 2602.9,
"valid_targets_min": 585
},
{
"epoch": 2.7744,
"grad_norm": 0.4114312862720891,
"learning_rate": 1.9822008431596083e-05,
"loss": 0.4334,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11041560769081116,
"step": 435,
"valid_targets_mean": 2347.3,
"valid_targets_min": 619
},
{
"epoch": 2.8064,
"grad_norm": 0.32073194733072063,
"learning_rate": 1.937712202342881e-05,
"loss": 0.4356,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11413581669330597,
"step": 440,
"valid_targets_mean": 3212.8,
"valid_targets_min": 997
},
{
"epoch": 2.8384,
"grad_norm": 0.46398697838839303,
"learning_rate": 1.8932543944998037e-05,
"loss": 0.4403,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10393176227807999,
"step": 445,
"valid_targets_mean": 2268.5,
"valid_targets_min": 972
},
{
"epoch": 2.8704,
"grad_norm": 0.36496198308115707,
"learning_rate": 1.8488494266119877e-05,
"loss": 0.4333,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10166573524475098,
"step": 450,
"valid_targets_mean": 2378.6,
"valid_targets_min": 535
},
{
"epoch": 2.9024,
"grad_norm": 0.3568647386974551,
"learning_rate": 1.804519279504834e-05,
"loss": 0.4366,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1209319531917572,
"step": 455,
"valid_targets_mean": 2209.4,
"valid_targets_min": 956
},
{
"epoch": 2.9344,
"grad_norm": 0.36074018847887357,
"learning_rate": 1.7602858969668365e-05,
"loss": 0.4425,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1085042655467987,
"step": 460,
"valid_targets_mean": 2346.1,
"valid_targets_min": 494
},
{
"epoch": 2.9664,
"grad_norm": 0.3543561136932817,
"learning_rate": 1.716171174887231e-05,
"loss": 0.4351,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10610386729240417,
"step": 465,
"valid_targets_mean": 2401.0,
"valid_targets_min": 783
},
{
"epoch": 2.9984,
"grad_norm": 0.353857442044966,
"learning_rate": 1.6721969504173484e-05,
"loss": 0.435,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1253969669342041,
"step": 470,
"valid_targets_mean": 2643.6,
"valid_targets_min": 924
},
{
"epoch": 3.0256,
"grad_norm": 0.3510915710318982,
"learning_rate": 1.628384991161041e-05,
"loss": 0.4272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1131058931350708,
"step": 475,
"valid_targets_mean": 3003.8,
"valid_targets_min": 1878
},
{
"epoch": 3.0576,
"grad_norm": 0.3834167334438327,
"learning_rate": 1.5847569843995452e-05,
"loss": 0.4323,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11351168155670166,
"step": 480,
"valid_targets_mean": 2438.5,
"valid_targets_min": 597
},
{
"epoch": 3.0896,
"grad_norm": 0.35598468413223955,
"learning_rate": 1.5413345263560922e-05,
"loss": 0.4272,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1119239330291748,
"step": 485,
"valid_targets_mean": 2589.8,
"valid_targets_min": 839
},
{
"epoch": 3.1216,
"grad_norm": 0.35985279567171535,
"learning_rate": 1.4981391115056032e-05,
"loss": 0.4258,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09996067732572556,
"step": 490,
"valid_targets_mean": 2321.4,
"valid_targets_min": 560
},
{
"epoch": 3.1536,
"grad_norm": 0.37201568967503795,
"learning_rate": 1.455192121934748e-05,
"loss": 0.4306,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10631603747606277,
"step": 495,
"valid_targets_mean": 2443.0,
"valid_targets_min": 926
},
{
"epoch": 3.1856,
"grad_norm": 0.3683426934856062,
"learning_rate": 1.4125148167576303e-05,
"loss": 0.4293,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10557325184345245,
"step": 500,
"valid_targets_mean": 2416.2,
"valid_targets_min": 761
},
{
"epoch": 3.2176,
"grad_norm": 0.3424560285954452,
"learning_rate": 1.3701283215923563e-05,
"loss": 0.4305,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11586504429578781,
"step": 505,
"valid_targets_mean": 2864.2,
"valid_targets_min": 610
},
{
"epoch": 3.2496,
"grad_norm": 0.3578344128596558,
"learning_rate": 1.328053618103677e-05,
"loss": 0.4211,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09824161231517792,
"step": 510,
"valid_targets_mean": 2193.2,
"valid_targets_min": 580
},
{
"epoch": 3.2816,
"grad_norm": 0.3561114928124899,
"learning_rate": 1.2863115336168916e-05,
"loss": 0.4297,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10587438941001892,
"step": 515,
"valid_targets_mean": 2400.8,
"valid_targets_min": 724
},
{
"epoch": 3.3136,
"grad_norm": 0.35873753478305465,
"learning_rate": 1.2449227308081509e-05,
"loss": 0.4304,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12063280493021011,
"step": 520,
"valid_targets_mean": 2813.1,
"valid_targets_min": 1160
},
{
"epoch": 3.3456,
"grad_norm": 0.3765778341929378,
"learning_rate": 1.2039076974762587e-05,
"loss": 0.4161,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09327021986246109,
"step": 525,
"valid_targets_mean": 1924.0,
"valid_targets_min": 617
},
{
"epoch": 3.3776,
"grad_norm": 0.4324457664668057,
"learning_rate": 1.163286736401044e-05,
"loss": 0.4172,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10706986486911774,
"step": 530,
"valid_targets_mean": 2922.2,
"valid_targets_min": 607
},
{
"epoch": 3.4096,
"grad_norm": 0.36145748070452655,
"learning_rate": 1.123079955293322e-05,
"loss": 0.4311,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11657077074050903,
"step": 535,
"valid_targets_mean": 2529.0,
"valid_targets_min": 760
},
{
"epoch": 3.4416,
"grad_norm": 0.34484082874160493,
"learning_rate": 1.0833072568414037e-05,
"loss": 0.4354,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.105072021484375,
"step": 540,
"valid_targets_mean": 2642.2,
"valid_targets_min": 855
},
{
"epoch": 3.4736000000000002,
"grad_norm": 0.3516326667725607,
"learning_rate": 1.0439883288591057e-05,
"loss": 0.4215,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1013285294175148,
"step": 545,
"valid_targets_mean": 2099.8,
"valid_targets_min": 631
},
{
"epoch": 3.5056000000000003,
"grad_norm": 0.37571166625529,
"learning_rate": 1.0051426345401202e-05,
"loss": 0.4324,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11065156757831573,
"step": 550,
"valid_targets_mean": 2494.1,
"valid_targets_min": 1062
},
{
"epoch": 3.5376,
"grad_norm": 0.34979910368925665,
"learning_rate": 9.667894028235704e-06,
"loss": 0.4321,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09944584965705872,
"step": 555,
"valid_targets_mean": 2163.4,
"valid_targets_min": 787
},
{
"epoch": 3.5696,
"grad_norm": 0.3572748730592331,
"learning_rate": 9.289476188755315e-06,
"loss": 0.434,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11576366424560547,
"step": 560,
"valid_targets_mean": 2483.0,
"valid_targets_min": 544
},
{
"epoch": 3.6016,
"grad_norm": 0.35967280637810656,
"learning_rate": 8.916360146912122e-06,
"loss": 0.4292,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10254612565040588,
"step": 565,
"valid_targets_mean": 2525.1,
"valid_targets_min": 762
},
{
"epoch": 3.6336,
"grad_norm": 0.3455425957064251,
"learning_rate": 8.548730598224646e-06,
"loss": 0.4277,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10262332856655121,
"step": 570,
"valid_targets_mean": 2300.0,
"valid_targets_min": 627
},
{
"epoch": 3.6656,
"grad_norm": 0.3688641590124713,
"learning_rate": 8.186769522352053e-06,
"loss": 0.4289,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.13081635534763336,
"step": 575,
"valid_targets_mean": 2898.2,
"valid_targets_min": 1551
},
{
"epoch": 3.6976,
"grad_norm": 0.35149511407843115,
"learning_rate": 7.830656093012714e-06,
"loss": 0.4279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10150306671857834,
"step": 580,
"valid_targets_mean": 2345.7,
"valid_targets_min": 965
},
{
"epoch": 3.7296,
"grad_norm": 0.37010020670114396,
"learning_rate": 7.480566589291696e-06,
"loss": 0.4309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12104976922273636,
"step": 585,
"valid_targets_mean": 2471.3,
"valid_targets_min": 748
},
{
"epoch": 3.7616,
"grad_norm": 0.329279647087639,
"learning_rate": 7.1366743083812285e-06,
"loss": 0.4247,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09903521835803986,
"step": 590,
"valid_targets_mean": 2431.8,
"valid_targets_min": 949
},
{
"epoch": 3.7936,
"grad_norm": 0.3706881450643897,
"learning_rate": 6.799149479797101e-06,
"loss": 0.4206,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10147304832935333,
"step": 595,
"valid_targets_mean": 2359.5,
"valid_targets_min": 1145
},
{
"epoch": 3.8256,
"grad_norm": 0.3544528487803963,
"learning_rate": 6.4681591811137e-06,
"loss": 0.4212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09534944593906403,
"step": 600,
"valid_targets_mean": 2434.1,
"valid_targets_min": 673
},
{
"epoch": 3.8576,
"grad_norm": 0.34572960602120706,
"learning_rate": 6.143867255259197e-06,
"loss": 0.424,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10098496079444885,
"step": 605,
"valid_targets_mean": 2049.7,
"valid_targets_min": 672
},
{
"epoch": 3.8895999999999997,
"grad_norm": 0.3385251496774486,
"learning_rate": 5.8264342294119504e-06,
"loss": 0.4309,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11481376737356186,
"step": 610,
"valid_targets_mean": 2369.4,
"valid_targets_min": 633
},
{
"epoch": 3.9215999999999998,
"grad_norm": 0.6228350973720654,
"learning_rate": 5.516017235538258e-06,
"loss": 0.4262,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11105981469154358,
"step": 615,
"valid_targets_mean": 2713.6,
"valid_targets_min": 841
},
{
"epoch": 3.9536,
"grad_norm": 0.4755804075607999,
"learning_rate": 5.212769932610695e-06,
"loss": 0.4315,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1181117370724678,
"step": 620,
"valid_targets_mean": 2359.4,
"valid_targets_min": 1621
},
{
"epoch": 3.9856,
"grad_norm": 0.35327709484906245,
"learning_rate": 4.916842430545681e-06,
"loss": 0.4264,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10537832975387573,
"step": 625,
"valid_targets_mean": 2640.1,
"valid_targets_min": 996
},
{
"epoch": 4.0128,
"grad_norm": 0.366071511572943,
"learning_rate": 4.628381215897837e-06,
"loss": 0.4178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11115087568759918,
"step": 630,
"valid_targets_mean": 2280.6,
"valid_targets_min": 1098
},
{
"epoch": 4.0448,
"grad_norm": 0.3746937074370881,
"learning_rate": 4.347529079347914e-06,
"loss": 0.4274,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10405690968036652,
"step": 635,
"valid_targets_mean": 1970.4,
"valid_targets_min": 610
},
{
"epoch": 4.0768,
"grad_norm": 0.33933207284418376,
"learning_rate": 4.074425045020247e-06,
"loss": 0.4183,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10596753656864166,
"step": 640,
"valid_targets_mean": 2338.5,
"valid_targets_min": 1075
},
{
"epoch": 4.1088,
"grad_norm": 0.36652871749180194,
"learning_rate": 3.8092043016646487e-06,
"loss": 0.4245,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10570182651281357,
"step": 645,
"valid_targets_mean": 2398.5,
"valid_targets_min": 679
},
{
"epoch": 4.1408,
"grad_norm": 0.34666144785143826,
"learning_rate": 3.551998135736867e-06,
"loss": 0.4223,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09318490326404572,
"step": 650,
"valid_targets_mean": 2170.8,
"valid_targets_min": 524
},
{
"epoch": 4.1728,
"grad_norm": 0.349600144940352,
"learning_rate": 3.3029338664107267e-06,
"loss": 0.4122,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10870470106601715,
"step": 655,
"valid_targets_mean": 2790.6,
"valid_targets_min": 1832
},
{
"epoch": 4.2048,
"grad_norm": 0.3657293355482953,
"learning_rate": 3.0621347825540625e-06,
"loss": 0.4263,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11297493427991867,
"step": 660,
"valid_targets_mean": 2327.8,
"valid_targets_min": 900
},
{
"epoch": 4.2368,
"grad_norm": 0.33512568991595465,
"learning_rate": 2.8297200816997183e-06,
"loss": 0.4204,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09641289710998535,
"step": 665,
"valid_targets_mean": 2293.8,
"valid_targets_min": 689
},
{
"epoch": 4.2688,
"grad_norm": 0.3361973926134737,
"learning_rate": 2.605804811041803e-06,
"loss": 0.4192,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.08825628459453583,
"step": 670,
"valid_targets_mean": 2078.7,
"valid_targets_min": 633
},
{
"epoch": 4.3008,
"grad_norm": 0.40275466302733576,
"learning_rate": 2.390499810486351e-06,
"loss": 0.4059,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11168158054351807,
"step": 675,
"valid_targets_mean": 2825.6,
"valid_targets_min": 814
},
{
"epoch": 4.3328,
"grad_norm": 0.33881094238458337,
"learning_rate": 2.183911657784685e-06,
"loss": 0.4212,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09750144183635712,
"step": 680,
"valid_targets_mean": 2606.6,
"valid_targets_min": 667
},
{
"epoch": 4.3648,
"grad_norm": 0.34649225347244983,
"learning_rate": 1.986142615776532e-06,
"loss": 0.4283,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09878174960613251,
"step": 685,
"valid_targets_mean": 2184.4,
"valid_targets_min": 810
},
{
"epoch": 4.3968,
"grad_norm": 0.6416853579629246,
"learning_rate": 1.7972905817690644e-06,
"loss": 0.4209,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10486841201782227,
"step": 690,
"valid_targets_mean": 2410.8,
"valid_targets_min": 562
},
{
"epoch": 4.4288,
"grad_norm": 0.34242289858813213,
"learning_rate": 1.617449039076955e-06,
"loss": 0.4163,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11274117976427078,
"step": 695,
"valid_targets_mean": 2641.4,
"valid_targets_min": 697
},
{
"epoch": 4.4608,
"grad_norm": 0.4489652104488755,
"learning_rate": 1.4467070107473413e-06,
"loss": 0.4288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10218752920627594,
"step": 700,
"valid_targets_mean": 2205.9,
"valid_targets_min": 693
},
{
"epoch": 4.4928,
"grad_norm": 0.38750863503880256,
"learning_rate": 1.2851490154926816e-06,
"loss": 0.4224,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11015324294567108,
"step": 705,
"valid_targets_mean": 2522.4,
"valid_targets_min": 989
},
{
"epoch": 4.5248,
"grad_norm": 0.3638004944669233,
"learning_rate": 1.1328550258533211e-06,
"loss": 0.4219,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11784189939498901,
"step": 710,
"valid_targets_mean": 2423.0,
"valid_targets_min": 832
},
{
"epoch": 4.5568,
"grad_norm": 0.33854575338443277,
"learning_rate": 9.899004286103953e-07,
"loss": 0.4222,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11596603691577911,
"step": 715,
"valid_targets_mean": 2593.8,
"valid_targets_min": 960
},
{
"epoch": 4.5888,
"grad_norm": 0.3426098834958257,
"learning_rate": 8.5635598746876e-07,
"loss": 0.4227,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10185693204402924,
"step": 720,
"valid_targets_mean": 2219.6,
"valid_targets_min": 1182
},
{
"epoch": 4.6208,
"grad_norm": 0.35018898874669724,
"learning_rate": 7.32287808028389e-07,
"loss": 0.4279,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.12424006313085556,
"step": 725,
"valid_targets_mean": 2555.5,
"valid_targets_min": 1024
},
{
"epoch": 4.6528,
"grad_norm": 0.32903025513626405,
"learning_rate": 6.177573050615327e-07,
"loss": 0.4177,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09462594985961914,
"step": 730,
"valid_targets_mean": 2214.7,
"valid_targets_min": 627
},
{
"epoch": 4.6848,
"grad_norm": 0.355942373474534,
"learning_rate": 5.128211721119213e-07,
"loss": 0.4242,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.1105259582400322,
"step": 735,
"valid_targets_mean": 2385.1,
"valid_targets_min": 1019
},
{
"epoch": 4.7168,
"grad_norm": 0.3440268419923915,
"learning_rate": 4.175313534309755e-07,
"loss": 0.4208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09921759366989136,
"step": 740,
"valid_targets_mean": 2268.0,
"valid_targets_min": 719
},
{
"epoch": 4.7488,
"grad_norm": 0.3391292108191765,
"learning_rate": 3.319350182649861e-07,
"loss": 0.423,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10051105916500092,
"step": 745,
"valid_targets_mean": 2327.6,
"valid_targets_min": 1502
},
{
"epoch": 4.7808,
"grad_norm": 0.34166487931018136,
"learning_rate": 2.560745375059392e-07,
"loss": 0.4167,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10346902906894684,
"step": 750,
"valid_targets_mean": 2279.1,
"valid_targets_min": 747
},
{
"epoch": 4.8128,
"grad_norm": 0.3214774241350311,
"learning_rate": 1.8998746271758016e-07,
"loss": 0.4164,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10058611631393433,
"step": 755,
"valid_targets_mean": 3048.1,
"valid_targets_min": 580
},
{
"epoch": 4.8448,
"grad_norm": 0.3387603005789863,
"learning_rate": 1.337065075470778e-07,
"loss": 0.4288,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09350170195102692,
"step": 760,
"valid_targets_mean": 2165.1,
"valid_targets_min": 762
},
{
"epoch": 4.8768,
"grad_norm": 0.33445320680299534,
"learning_rate": 8.725953153150279e-08,
"loss": 0.4178,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10848960280418396,
"step": 765,
"valid_targets_mean": 2675.0,
"valid_targets_min": 1314
},
{
"epoch": 4.9088,
"grad_norm": 0.3423233493479509,
"learning_rate": 5.066952630711886e-08,
"loss": 0.4176,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.10347608476877213,
"step": 770,
"valid_targets_mean": 2338.2,
"valid_targets_min": 668
},
{
"epoch": 4.9408,
"grad_norm": 0.3242922001719799,
"learning_rate": 2.3954604228342283e-08,
"loss": 0.4207,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.11179254204034805,
"step": 775,
"valid_targets_mean": 2694.2,
"valid_targets_min": 1007
},
{
"epoch": 4.9728,
"grad_norm": 0.33364671731962425,
"learning_rate": 7.12798940197601e-09,
"loss": 0.4281,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.09912078082561493,
"step": 780,
"valid_targets_mean": 2130.0,
"valid_targets_min": 580
},
{
"epoch": 5.0,
"grad_norm": 0.7100559447088324,
"learning_rate": 1.9801114115480802e-10,
"loss": 0.4208,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4209003150463104,
"step": 785,
"valid_targets_mean": 2253.5,
"valid_targets_min": 645
},
{
"epoch": 5.0,
"loss_nan_ranks": 0,
"loss_rank_avg": 0.4209003150463104,
"step": 785,
"total_flos": 8.136605209670451e+17,
"train_loss": 0.4667121115763476,
"train_runtime": 9534.5841,
"train_samples_per_second": 5.242,
"train_steps_per_second": 0.082,
"valid_targets_mean": 2253.5,
"valid_targets_min": 645
}
],
"logging_steps": 5,
"max_steps": 785,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8.136605209670451e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}