| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 285, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 7.583328114734628, | |
| "learning_rate": 5.517241379310345e-06, | |
| "loss": 0.5653, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.166676864027977, | |
| "step": 5, | |
| "valid_targets_mean": 3166.7, | |
| "valid_targets_min": 1420 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 3.1401618568879677, | |
| "learning_rate": 1.2413793103448277e-05, | |
| "loss": 0.4991, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.1019982397556305, | |
| "step": 10, | |
| "valid_targets_mean": 2351.6, | |
| "valid_targets_min": 1195 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 1.104245248825641, | |
| "learning_rate": 1.931034482758621e-05, | |
| "loss": 0.4163, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.11541903018951416, | |
| "step": 15, | |
| "valid_targets_mean": 3168.0, | |
| "valid_targets_min": 1312 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 0.732712115494436, | |
| "learning_rate": 2.620689655172414e-05, | |
| "loss": 0.3658, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.12487722188234329, | |
| "step": 20, | |
| "valid_targets_mean": 3523.8, | |
| "valid_targets_min": 1517 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.5506644308788069, | |
| "learning_rate": 3.310344827586207e-05, | |
| "loss": 0.3378, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08351944386959076, | |
| "step": 25, | |
| "valid_targets_mean": 3054.6, | |
| "valid_targets_min": 1110 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.5212168935643539, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3227, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.08309619128704071, | |
| "step": 30, | |
| "valid_targets_mean": 2739.4, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 0.37621279442640654, | |
| "learning_rate": 3.996236225800298e-05, | |
| "loss": 0.2841, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.058637700974941254, | |
| "step": 35, | |
| "valid_targets_mean": 3032.6, | |
| "valid_targets_min": 962 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 0.3740113710680447, | |
| "learning_rate": 3.9849590691974206e-05, | |
| "loss": 0.275, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.049976594746112823, | |
| "step": 40, | |
| "valid_targets_mean": 2221.6, | |
| "valid_targets_min": 1249 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.3555541947187885, | |
| "learning_rate": 3.966210974862433e-05, | |
| "loss": 0.2829, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.047423090785741806, | |
| "step": 45, | |
| "valid_targets_mean": 2186.9, | |
| "valid_targets_min": 1183 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.3614830806096002, | |
| "learning_rate": 3.940062506389089e-05, | |
| "loss": 0.2688, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07531788945198059, | |
| "step": 50, | |
| "valid_targets_mean": 2673.9, | |
| "valid_targets_min": 1141 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 0.35164636779651554, | |
| "learning_rate": 3.9066120807083875e-05, | |
| "loss": 0.2583, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0835060328245163, | |
| "step": 55, | |
| "valid_targets_mean": 3016.2, | |
| "valid_targets_min": 1526 | |
| }, | |
| { | |
| "epoch": 1.0533333333333332, | |
| "grad_norm": 0.4004740105789504, | |
| "learning_rate": 3.865985597669478e-05, | |
| "loss": 0.2609, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05621650442481041, | |
| "step": 60, | |
| "valid_targets_mean": 2221.8, | |
| "valid_targets_min": 1151 | |
| }, | |
| { | |
| "epoch": 1.1422222222222222, | |
| "grad_norm": 0.3362478514134072, | |
| "learning_rate": 3.818335966181045e-05, | |
| "loss": 0.2418, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06041599065065384, | |
| "step": 65, | |
| "valid_targets_mean": 2575.6, | |
| "valid_targets_min": 1126 | |
| }, | |
| { | |
| "epoch": 1.231111111111111, | |
| "grad_norm": 0.327805935004158, | |
| "learning_rate": 3.76384252869671e-05, | |
| "loss": 0.2527, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05153035372495651, | |
| "step": 70, | |
| "valid_targets_mean": 2401.1, | |
| "valid_targets_min": 1291 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.3344211876841236, | |
| "learning_rate": 3.702710386210531e-05, | |
| "loss": 0.2363, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0525384359061718, | |
| "step": 75, | |
| "valid_targets_mean": 2742.1, | |
| "valid_targets_min": 1147 | |
| }, | |
| { | |
| "epoch": 1.4088888888888889, | |
| "grad_norm": 0.3391981121560211, | |
| "learning_rate": 3.635169626303168e-05, | |
| "loss": 0.238, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06917819380760193, | |
| "step": 80, | |
| "valid_targets_mean": 3123.1, | |
| "valid_targets_min": 1190 | |
| }, | |
| { | |
| "epoch": 1.4977777777777779, | |
| "grad_norm": 0.32922754267793913, | |
| "learning_rate": 3.561474457144189e-05, | |
| "loss": 0.2449, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05151129886507988, | |
| "step": 85, | |
| "valid_targets_mean": 3233.1, | |
| "valid_targets_min": 1357 | |
| }, | |
| { | |
| "epoch": 1.5866666666666667, | |
| "grad_norm": 0.32706014931505833, | |
| "learning_rate": 3.4819022507099184e-05, | |
| "loss": 0.2223, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05639180168509483, | |
| "step": 90, | |
| "valid_targets_mean": 2922.6, | |
| "valid_targets_min": 1310 | |
| }, | |
| { | |
| "epoch": 1.6755555555555555, | |
| "grad_norm": 0.341988951356744, | |
| "learning_rate": 3.3967524988179463e-05, | |
| "loss": 0.2319, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.040721211582422256, | |
| "step": 95, | |
| "valid_targets_mean": 2238.1, | |
| "valid_targets_min": 1199 | |
| }, | |
| { | |
| "epoch": 1.7644444444444445, | |
| "grad_norm": 0.3543043397710712, | |
| "learning_rate": 3.306345685907553e-05, | |
| "loss": 0.2313, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03705868870019913, | |
| "step": 100, | |
| "valid_targets_mean": 2039.1, | |
| "valid_targets_min": 1054 | |
| }, | |
| { | |
| "epoch": 1.8533333333333335, | |
| "grad_norm": 0.3169475427171822, | |
| "learning_rate": 3.211022082808652e-05, | |
| "loss": 0.2231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03866267204284668, | |
| "step": 105, | |
| "valid_targets_mean": 2269.2, | |
| "valid_targets_min": 1244 | |
| }, | |
| { | |
| "epoch": 1.942222222222222, | |
| "grad_norm": 0.3626154274513678, | |
| "learning_rate": 3.111140466039205e-05, | |
| "loss": 0.2454, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05644143745303154, | |
| "step": 110, | |
| "valid_targets_mean": 2740.2, | |
| "valid_targets_min": 1249 | |
| }, | |
| { | |
| "epoch": 2.017777777777778, | |
| "grad_norm": 0.3229168931990422, | |
| "learning_rate": 3.0070767674514355e-05, | |
| "loss": 0.231, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.045998841524124146, | |
| "step": 115, | |
| "valid_targets_mean": 2432.6, | |
| "valid_targets_min": 1365 | |
| }, | |
| { | |
| "epoch": 2.1066666666666665, | |
| "grad_norm": 0.4074227847296487, | |
| "learning_rate": 2.8992226593092135e-05, | |
| "loss": 0.2261, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.041235923767089844, | |
| "step": 120, | |
| "valid_targets_mean": 2494.5, | |
| "valid_targets_min": 1151 | |
| }, | |
| { | |
| "epoch": 2.1955555555555555, | |
| "grad_norm": 0.3124502564340804, | |
| "learning_rate": 2.7879840801220967e-05, | |
| "loss": 0.2109, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04943722486495972, | |
| "step": 125, | |
| "valid_targets_mean": 2937.8, | |
| "valid_targets_min": 1162 | |
| }, | |
| { | |
| "epoch": 2.2844444444444445, | |
| "grad_norm": 0.36347414667971645, | |
| "learning_rate": 2.6737797067844403e-05, | |
| "loss": 0.2205, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.041548021137714386, | |
| "step": 130, | |
| "valid_targets_mean": 2297.9, | |
| "valid_targets_min": 1539 | |
| }, | |
| { | |
| "epoch": 2.3733333333333335, | |
| "grad_norm": 0.3397949895260685, | |
| "learning_rate": 2.5570393787701063e-05, | |
| "loss": 0.2151, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.07428301870822906, | |
| "step": 135, | |
| "valid_targets_mean": 3256.2, | |
| "valid_targets_min": 1340 | |
| }, | |
| { | |
| "epoch": 2.462222222222222, | |
| "grad_norm": 0.3409669850928538, | |
| "learning_rate": 2.4382024803137396e-05, | |
| "loss": 0.214, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05983541160821915, | |
| "step": 140, | |
| "valid_targets_mean": 2922.6, | |
| "valid_targets_min": 1264 | |
| }, | |
| { | |
| "epoch": 2.551111111111111, | |
| "grad_norm": 0.33002068625976483, | |
| "learning_rate": 2.317716286667723e-05, | |
| "loss": 0.2169, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05062925070524216, | |
| "step": 145, | |
| "valid_targets_mean": 2481.7, | |
| "valid_targets_min": 1088 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.3116815064192726, | |
| "learning_rate": 2.196034280659122e-05, | |
| "loss": 0.2099, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03972204774618149, | |
| "step": 150, | |
| "valid_targets_mean": 2388.9, | |
| "valid_targets_min": 1028 | |
| }, | |
| { | |
| "epoch": 2.728888888888889, | |
| "grad_norm": 0.32691657273888747, | |
| "learning_rate": 2.073614445882718e-05, | |
| "loss": 0.2243, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05966932699084282, | |
| "step": 155, | |
| "valid_targets_mean": 2523.2, | |
| "valid_targets_min": 1338 | |
| }, | |
| { | |
| "epoch": 2.8177777777777777, | |
| "grad_norm": 0.32702194667234424, | |
| "learning_rate": 1.950917542954176e-05, | |
| "loss": 0.22, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05485967546701431, | |
| "step": 160, | |
| "valid_targets_mean": 2835.8, | |
| "valid_targets_min": 1045 | |
| }, | |
| { | |
| "epoch": 2.9066666666666667, | |
| "grad_norm": 0.3371678153197184, | |
| "learning_rate": 1.8284053753111205e-05, | |
| "loss": 0.2104, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.034001681953668594, | |
| "step": 165, | |
| "valid_targets_mean": 2010.9, | |
| "valid_targets_min": 1203 | |
| }, | |
| { | |
| "epoch": 2.9955555555555557, | |
| "grad_norm": 0.3400453584476225, | |
| "learning_rate": 1.7065390510892767e-05, | |
| "loss": 0.2065, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04110487177968025, | |
| "step": 170, | |
| "valid_targets_mean": 2373.1, | |
| "valid_targets_min": 1131 | |
| }, | |
| { | |
| "epoch": 3.071111111111111, | |
| "grad_norm": 0.29868424312125685, | |
| "learning_rate": 1.5857772476155634e-05, | |
| "loss": 0.2018, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05249975621700287, | |
| "step": 175, | |
| "valid_targets_mean": 3154.9, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.3706098125869751, | |
| "learning_rate": 1.4665744850502035e-05, | |
| "loss": 0.2145, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.055659279227256775, | |
| "step": 180, | |
| "valid_targets_mean": 2609.7, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 3.2488888888888887, | |
| "grad_norm": 0.34761594185958783, | |
| "learning_rate": 1.3493794156754744e-05, | |
| "loss": 0.2129, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03931069374084473, | |
| "step": 185, | |
| "valid_targets_mean": 2712.7, | |
| "valid_targets_min": 1002 | |
| }, | |
| { | |
| "epoch": 3.3377777777777777, | |
| "grad_norm": 0.3480472578827022, | |
| "learning_rate": 1.2346331352698206e-05, | |
| "loss": 0.2032, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06302663683891296, | |
| "step": 190, | |
| "valid_targets_mean": 2542.1, | |
| "valid_targets_min": 1383 | |
| }, | |
| { | |
| "epoch": 3.4266666666666667, | |
| "grad_norm": 0.35674083440708165, | |
| "learning_rate": 1.1227675229229453e-05, | |
| "loss": 0.1997, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06049852818250656, | |
| "step": 195, | |
| "valid_targets_mean": 2524.8, | |
| "valid_targets_min": 993 | |
| }, | |
| { | |
| "epoch": 3.5155555555555553, | |
| "grad_norm": 0.33660537404764074, | |
| "learning_rate": 1.0142036155404322e-05, | |
| "loss": 0.2092, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04671245440840721, | |
| "step": 200, | |
| "valid_targets_mean": 2890.4, | |
| "valid_targets_min": 388 | |
| }, | |
| { | |
| "epoch": 3.6044444444444443, | |
| "grad_norm": 0.4234454768211141, | |
| "learning_rate": 9.093500231559076e-06, | |
| "loss": 0.1932, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.039773087948560715, | |
| "step": 205, | |
| "valid_targets_mean": 2687.6, | |
| "valid_targets_min": 1134 | |
| }, | |
| { | |
| "epoch": 3.6933333333333334, | |
| "grad_norm": 0.3462492949486452, | |
| "learning_rate": 8.086013910151334e-06, | |
| "loss": 0.206, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.0793825015425682, | |
| "step": 210, | |
| "valid_targets_mean": 3072.4, | |
| "valid_targets_min": 1523 | |
| }, | |
| { | |
| "epoch": 3.7822222222222224, | |
| "grad_norm": 0.33043166323357076, | |
| "learning_rate": 7.123369142204175e-06, | |
| "loss": 0.1978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05091720446944237, | |
| "step": 215, | |
| "valid_targets_mean": 2395.9, | |
| "valid_targets_min": 1181 | |
| }, | |
| { | |
| "epoch": 3.871111111111111, | |
| "grad_norm": 0.3276021134703391, | |
| "learning_rate": 6.209189105258661e-06, | |
| "loss": 0.1978, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04009982943534851, | |
| "step": 220, | |
| "valid_targets_mean": 2544.8, | |
| "valid_targets_min": 1459 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.3518358226857207, | |
| "learning_rate": 5.346914566551746e-06, | |
| "loss": 0.2057, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05190512537956238, | |
| "step": 225, | |
| "valid_targets_mean": 2315.4, | |
| "valid_targets_min": 1089 | |
| }, | |
| { | |
| "epoch": 4.035555555555556, | |
| "grad_norm": 0.446701948663916, | |
| "learning_rate": 4.53979093274526e-06, | |
| "loss": 0.1909, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04597758501768112, | |
| "step": 230, | |
| "valid_targets_mean": 2736.4, | |
| "valid_targets_min": 1155 | |
| }, | |
| { | |
| "epoch": 4.124444444444444, | |
| "grad_norm": 0.3070256238047938, | |
| "learning_rate": 3.7908560349481072e-06, | |
| "loss": 0.1998, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.05320999398827553, | |
| "step": 235, | |
| "valid_targets_mean": 3201.8, | |
| "valid_targets_min": 1119 | |
| }, | |
| { | |
| "epoch": 4.213333333333333, | |
| "grad_norm": 0.3084555402280151, | |
| "learning_rate": 3.102928695005858e-06, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.029193339869379997, | |
| "step": 240, | |
| "valid_targets_mean": 1924.7, | |
| "valid_targets_min": 1318 | |
| }, | |
| { | |
| "epoch": 4.302222222222222, | |
| "grad_norm": 0.33282212562089014, | |
| "learning_rate": 2.4785981160918703e-06, | |
| "loss": 0.2001, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04285688325762749, | |
| "step": 245, | |
| "valid_targets_mean": 2463.9, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 4.391111111111111, | |
| "grad_norm": 0.3167323683701005, | |
| "learning_rate": 1.9202141375311335e-06, | |
| "loss": 0.2013, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04943656548857689, | |
| "step": 250, | |
| "valid_targets_mean": 2726.8, | |
| "valid_targets_min": 1173 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.33351932332643597, | |
| "learning_rate": 1.4298783905356906e-06, | |
| "loss": 0.1954, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04813677817583084, | |
| "step": 255, | |
| "valid_targets_mean": 2432.7, | |
| "valid_targets_min": 942 | |
| }, | |
| { | |
| "epoch": 4.568888888888889, | |
| "grad_norm": 0.3163118446536364, | |
| "learning_rate": 1.0094363881392665e-06, | |
| "loss": 0.1975, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.06139428913593292, | |
| "step": 260, | |
| "valid_targets_mean": 3001.4, | |
| "valid_targets_min": 1329 | |
| }, | |
| { | |
| "epoch": 4.657777777777778, | |
| "grad_norm": 0.33335357258605675, | |
| "learning_rate": 6.604705791029586e-07, | |
| "loss": 0.1987, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.044643834233284, | |
| "step": 265, | |
| "valid_targets_mean": 2461.6, | |
| "valid_targets_min": 1178 | |
| }, | |
| { | |
| "epoch": 4.746666666666667, | |
| "grad_norm": 0.33713590138912286, | |
| "learning_rate": 3.842943919353914e-07, | |
| "loss": 0.1881, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03312596306204796, | |
| "step": 270, | |
| "valid_targets_mean": 2348.8, | |
| "valid_targets_min": 1282 | |
| }, | |
| { | |
| "epoch": 4.835555555555556, | |
| "grad_norm": 0.32804179691400653, | |
| "learning_rate": 1.819472914443998e-07, | |
| "loss": 0.207, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.03083619847893715, | |
| "step": 275, | |
| "valid_targets_mean": 2106.2, | |
| "valid_targets_min": 1274 | |
| }, | |
| { | |
| "epoch": 4.924444444444444, | |
| "grad_norm": 0.3250869471276454, | |
| "learning_rate": 5.4190866426195866e-08, | |
| "loss": 0.1982, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.04578050225973129, | |
| "step": 280, | |
| "valid_targets_mean": 2654.6, | |
| "valid_targets_min": 1454 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.6141969340231087, | |
| "learning_rate": 1.5059632171099402e-09, | |
| "loss": 0.2007, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2248307466506958, | |
| "step": 285, | |
| "valid_targets_mean": 2971.6, | |
| "valid_targets_min": 1283 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "loss_nan_ranks": 0, | |
| "loss_rank_avg": 0.2248307466506958, | |
| "step": 285, | |
| "total_flos": 3.151185311610962e+17, | |
| "train_loss": 0.24082276695653013, | |
| "train_runtime": 3979.0496, | |
| "train_samples_per_second": 4.505, | |
| "train_steps_per_second": 0.072, | |
| "valid_targets_mean": 2971.6, | |
| "valid_targets_min": 1283 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 285, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.151185311610962e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |