| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9927797833935017, | |
| "global_step": 11040, | |
| "is_hyper_param_search": true, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 4.599947213172912, | |
| "distil_loss": 0.0, | |
| "epoch": 0.05, | |
| "learning_rate": 0.001, | |
| "loss": 4.5999, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.8339186582565308, | |
| "distil_loss": 0.0, | |
| "epoch": 0.09, | |
| "learning_rate": 0.002, | |
| "loss": 1.8339, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.3567713406085968, | |
| "distil_loss": 0.0, | |
| "epoch": 0.14, | |
| "learning_rate": 0.003, | |
| "loss": 1.3568, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.2095605379343033, | |
| "distil_loss": 0.0, | |
| "epoch": 0.18, | |
| "learning_rate": 0.004, | |
| "loss": 1.2096, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 1000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.1451576855182648, | |
| "distil_loss": 0.0, | |
| "epoch": 0.23, | |
| "learning_rate": 0.005, | |
| "loss": 1.1452, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 1250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 1.1159179366551912, | |
| "distil_loss": 0.0, | |
| "epoch": 0.25, | |
| "eval_ampere_temperature": 0.0, | |
| "eval_exact_match": 77.360454115421, | |
| "eval_f1": 86.34721419771964, | |
| "eval_progress": 0.0, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 1380 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.0835382461547851, | |
| "distil_loss": 0.0, | |
| "epoch": 0.27, | |
| "learning_rate": 0.006, | |
| "loss": 1.1004, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 1500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.1237352261543274, | |
| "distil_loss": 0.0, | |
| "epoch": 0.32, | |
| "learning_rate": 0.006999999999999999, | |
| "loss": 1.1237, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 1750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.116382148861885, | |
| "distil_loss": 0.0, | |
| "epoch": 0.36, | |
| "learning_rate": 0.008, | |
| "loss": 1.1164, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 2000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.0670130407810212, | |
| "distil_loss": 0.0, | |
| "epoch": 0.41, | |
| "learning_rate": 0.009000000000000001, | |
| "loss": 1.067, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 2250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 0.0, | |
| "ce_loss": 1.0639437032938004, | |
| "distil_loss": 0.0, | |
| "epoch": 0.45, | |
| "learning_rate": 0.01, | |
| "loss": 1.0639, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 2500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 1.6912145472259645, | |
| "ce_loss": 1.0629408322572709, | |
| "distil_loss": 0.0, | |
| "epoch": 0.5, | |
| "learning_rate": 0.00970862470862471, | |
| "loss": 1.0629, | |
| "nnz_perc": 1.0, | |
| "progress": 0.029020979020979, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 2750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 1.0985989689826965, | |
| "distil_loss": 0.0, | |
| "epoch": 0.5, | |
| "eval_ampere_temperature": 1.7570655286803998, | |
| "eval_exact_match": 75.37369914853359, | |
| "eval_f1": 85.4846023509551, | |
| "eval_progress": 0.03018648018648018, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 2760 | |
| }, | |
| { | |
| "ampere_temperature": 3.2905000860378912, | |
| "ce_loss": 1.0230497049788634, | |
| "distil_loss": 0.0, | |
| "epoch": 0.54, | |
| "learning_rate": 0.009417249417249416, | |
| "loss": 1.0261, | |
| "nnz_perc": 1.0, | |
| "progress": 0.058158508158508204, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 3000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 4.793831310474058, | |
| "ce_loss": 0.9981409941911698, | |
| "distil_loss": 0.0, | |
| "epoch": 0.59, | |
| "learning_rate": 0.009125874125874126, | |
| "loss": 0.9981, | |
| "nnz_perc": 1.0, | |
| "progress": 0.0872960372960373, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 3250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 6.204176736633212, | |
| "ce_loss": 1.0074045011997224, | |
| "distil_loss": 0.0, | |
| "epoch": 0.63, | |
| "learning_rate": 0.008834498834498834, | |
| "loss": 1.0074, | |
| "nnz_perc": 1.0, | |
| "progress": 0.1164335664335664, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 3500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 7.524504880614105, | |
| "ce_loss": 0.9891920503377915, | |
| "distil_loss": 0.0, | |
| "epoch": 0.68, | |
| "learning_rate": 0.008543123543123544, | |
| "loss": 0.9892, | |
| "nnz_perc": 1.0, | |
| "progress": 0.1455710955710956, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 3750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 8.757784258515466, | |
| "ce_loss": 1.0083434996008873, | |
| "distil_loss": 0.0, | |
| "epoch": 0.72, | |
| "learning_rate": 0.008251748251748252, | |
| "loss": 1.0083, | |
| "nnz_perc": 1.0, | |
| "progress": 0.1747086247086247, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 4000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 0.9699458577803203, | |
| "distil_loss": 0.0, | |
| "epoch": 0.75, | |
| "eval_ampere_temperature": 9.411504281933276, | |
| "eval_exact_match": 79.94323557237465, | |
| "eval_f1": 88.17033886272301, | |
| "eval_progress": 0.191025641025641, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 4140 | |
| }, | |
| { | |
| "ampere_temperature": 9.906983386436048, | |
| "ce_loss": 0.9698418254202062, | |
| "distil_loss": 0.0, | |
| "epoch": 0.77, | |
| "learning_rate": 0.00796037296037296, | |
| "loss": 0.9699, | |
| "nnz_perc": 1.0, | |
| "progress": 0.2038461538461538, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 4250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 10.97507078047459, | |
| "ce_loss": 0.9425091907978058, | |
| "distil_loss": 0.0, | |
| "epoch": 0.81, | |
| "learning_rate": 0.007668997668997669, | |
| "loss": 0.9425, | |
| "nnz_perc": 1.0, | |
| "progress": 0.232983682983683, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 4500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 11.965014956729835, | |
| "ce_loss": 0.9731772248744964, | |
| "distil_loss": 0.0, | |
| "epoch": 0.86, | |
| "learning_rate": 0.007377622377622378, | |
| "loss": 0.9732, | |
| "nnz_perc": 1.0, | |
| "progress": 0.2621212121212122, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 4750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 12.879784431300521, | |
| "ce_loss": 0.9197172073125839, | |
| "distil_loss": 0.0, | |
| "epoch": 0.9, | |
| "learning_rate": 0.007086247086247086, | |
| "loss": 0.9197, | |
| "nnz_perc": 1.0, | |
| "progress": 0.2912587412587413, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 5000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 13.722347720285395, | |
| "ce_loss": 0.9390108388662338, | |
| "distil_loss": 0.0, | |
| "epoch": 0.95, | |
| "learning_rate": 0.006794871794871795, | |
| "loss": 0.939, | |
| "nnz_perc": 1.0, | |
| "progress": 0.3203962703962704, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 5250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 14.495673339783197, | |
| "ce_loss": 0.9188237161636352, | |
| "distil_loss": 0.0, | |
| "epoch": 0.99, | |
| "learning_rate": 0.006503496503496503, | |
| "loss": 0.9188, | |
| "nnz_perc": 1.0, | |
| "progress": 0.3495337995337995, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 5500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 0.9402093678712845, | |
| "distil_loss": 0.0, | |
| "epoch": 1.0, | |
| "eval_ampere_temperature": 14.55463723501537, | |
| "eval_exact_match": 81.63670766319773, | |
| "eval_f1": 89.21446798933258, | |
| "eval_progress": 0.35186480186480185, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 5520 | |
| }, | |
| { | |
| "ampere_temperature": 15.202729805892675, | |
| "ce_loss": 0.7292252867118172, | |
| "distil_loss": 0.0, | |
| "epoch": 1.04, | |
| "learning_rate": 0.006212121212121212, | |
| "loss": 0.7461, | |
| "nnz_perc": 1.0, | |
| "progress": 0.3786713286713287, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 5750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 15.846485634712565, | |
| "ce_loss": 0.7380791381597519, | |
| "distil_loss": 0.0, | |
| "epoch": 1.08, | |
| "learning_rate": 0.005920745920745921, | |
| "loss": 0.7381, | |
| "nnz_perc": 1.0, | |
| "progress": 0.4078088578088578, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 6000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 16.429909342341613, | |
| "ce_loss": 0.7548821606636047, | |
| "distil_loss": 0.0, | |
| "epoch": 1.13, | |
| "learning_rate": 0.005629370629370629, | |
| "loss": 0.7549, | |
| "nnz_perc": 1.0, | |
| "progress": 0.436946386946387, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 6250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 16.955969444878562, | |
| "ce_loss": 0.7157313173413277, | |
| "distil_loss": 0.0, | |
| "epoch": 1.17, | |
| "learning_rate": 0.005337995337995338, | |
| "loss": 0.7157, | |
| "nnz_perc": 1.0, | |
| "progress": 0.4660839160839161, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 6500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 17.427634458422148, | |
| "ce_loss": 0.7611533465385437, | |
| "distil_loss": 0.0, | |
| "epoch": 1.22, | |
| "learning_rate": 0.005046620046620046, | |
| "loss": 0.7612, | |
| "nnz_perc": 1.0, | |
| "progress": 0.4952214452214452, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 6750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 0.7508984424670537, | |
| "distil_loss": 0.0, | |
| "epoch": 1.25, | |
| "eval_ampere_temperature": 17.68575872652857, | |
| "eval_exact_match": 81.51371807000946, | |
| "eval_f1": 88.80037767793473, | |
| "eval_progress": 0.5127039627039627, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 6900 | |
| }, | |
| { | |
| "ampere_temperature": 17.847872899071124, | |
| "ce_loss": 0.6947148644924164, | |
| "distil_loss": 0.0, | |
| "epoch": 1.26, | |
| "learning_rate": 0.004755244755244755, | |
| "loss": 0.7284, | |
| "nnz_perc": 1.0, | |
| "progress": 0.5243589743589744, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 7000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 18.219653282924224, | |
| "ce_loss": 0.7663285417556762, | |
| "distil_loss": 0.0, | |
| "epoch": 1.31, | |
| "learning_rate": 0.004463869463869464, | |
| "loss": 0.7663, | |
| "nnz_perc": 1.0, | |
| "progress": 0.5534965034965035, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 7250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 18.545944126080197, | |
| "ce_loss": 0.691897637873888, | |
| "distil_loss": 0.0, | |
| "epoch": 1.35, | |
| "learning_rate": 0.004172494172494173, | |
| "loss": 0.6919, | |
| "nnz_perc": 1.0, | |
| "progress": 0.5826340326340327, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 7500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 18.82971394463778, | |
| "ce_loss": 0.7088325002193451, | |
| "distil_loss": 0.0, | |
| "epoch": 1.4, | |
| "learning_rate": 0.0038811188811188812, | |
| "loss": 0.7088, | |
| "nnz_perc": 1.0, | |
| "progress": 0.6117715617715618, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 7750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.07393125469572, | |
| "ce_loss": 0.7107383124232293, | |
| "distil_loss": 0.0, | |
| "epoch": 1.44, | |
| "learning_rate": 0.0035897435897435897, | |
| "loss": 0.7107, | |
| "nnz_perc": 1.0, | |
| "progress": 0.6409090909090909, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 8000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.281564572352753, | |
| "ce_loss": 0.7073436776399612, | |
| "distil_loss": 0.0, | |
| "epoch": 1.49, | |
| "learning_rate": 0.0032983682983682983, | |
| "loss": 0.7073, | |
| "nnz_perc": 1.0, | |
| "progress": 0.6700466200466201, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 8250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 0.7176821072896321, | |
| "distil_loss": 0.0, | |
| "epoch": 1.49, | |
| "eval_ampere_temperature": 19.304163095074752, | |
| "eval_exact_match": 82.71523178807946, | |
| "eval_f1": 89.82467226075393, | |
| "eval_progress": 0.6735431235431235, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 8280 | |
| }, | |
| { | |
| "ampere_temperature": 19.455582413707628, | |
| "ce_loss": 0.7027889224615964, | |
| "distil_loss": 0.0, | |
| "epoch": 1.53, | |
| "learning_rate": 0.0030069930069930068, | |
| "loss": 0.7046, | |
| "nnz_perc": 1.0, | |
| "progress": 0.6991841491841492, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 8500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.598953294859086, | |
| "ce_loss": 0.6954642720222474, | |
| "distil_loss": 0.0, | |
| "epoch": 1.58, | |
| "learning_rate": 0.0027156177156177157, | |
| "loss": 0.6955, | |
| "nnz_perc": 1.0, | |
| "progress": 0.7283216783216783, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 8750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.71464573190587, | |
| "ce_loss": 0.7050508892536164, | |
| "distil_loss": 0.0, | |
| "epoch": 1.62, | |
| "learning_rate": 0.0024242424242424242, | |
| "loss": 0.7051, | |
| "nnz_perc": 1.0, | |
| "progress": 0.7574592074592075, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 9000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.805628240946717, | |
| "ce_loss": 0.6534205512404442, | |
| "distil_loss": 0.0, | |
| "epoch": 1.67, | |
| "learning_rate": 0.0021328671328671328, | |
| "loss": 0.6534, | |
| "nnz_perc": 1.0, | |
| "progress": 0.7865967365967366, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 9250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.874869338080376, | |
| "ce_loss": 0.6931327093839645, | |
| "distil_loss": 0.0, | |
| "epoch": 1.71, | |
| "learning_rate": 0.0018414918414918417, | |
| "loss": 0.6931, | |
| "nnz_perc": 1.0, | |
| "progress": 0.8157342657342658, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 9500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 0.6803905916400254, | |
| "distil_loss": 0.0, | |
| "epoch": 1.74, | |
| "eval_ampere_temperature": 19.90914467925581, | |
| "eval_exact_match": 83.3112582781457, | |
| "eval_f1": 90.48253679391624, | |
| "eval_progress": 0.8343822843822843, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 9660 | |
| }, | |
| { | |
| "ampere_temperature": 19.925337539405586, | |
| "ce_loss": 0.6604658047358195, | |
| "distil_loss": 0.0, | |
| "epoch": 1.76, | |
| "learning_rate": 0.0015501165501165502, | |
| "loss": 0.6732, | |
| "nnz_perc": 1.0, | |
| "progress": 0.8448717948717949, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 9750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.960001361021092, | |
| "ce_loss": 0.6589477426409721, | |
| "distil_loss": 0.0, | |
| "epoch": 1.81, | |
| "learning_rate": 0.001258741258741259, | |
| "loss": 0.6589, | |
| "nnz_perc": 1.0, | |
| "progress": 0.874009324009324, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 10000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.981829319025636, | |
| "ce_loss": 0.6645486508607864, | |
| "distil_loss": 0.0, | |
| "epoch": 1.85, | |
| "learning_rate": 0.0009673659673659674, | |
| "loss": 0.6645, | |
| "nnz_perc": 1.0, | |
| "progress": 0.9031468531468532, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 10250, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.99378992951796, | |
| "ce_loss": 0.6627120378017426, | |
| "distil_loss": 0.0, | |
| "epoch": 1.9, | |
| "learning_rate": 0.000675990675990676, | |
| "loss": 0.6627, | |
| "nnz_perc": 1.0, | |
| "progress": 0.9322843822843823, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 10500, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.998851708596806, | |
| "ce_loss": 0.6525639802217483, | |
| "distil_loss": 0.0, | |
| "epoch": 1.94, | |
| "learning_rate": 0.00038461538461538467, | |
| "loss": 0.6526, | |
| "nnz_perc": 1.0, | |
| "progress": 0.9614219114219115, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 10750, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ampere_temperature": 19.999983172360917, | |
| "ce_loss": 0.630506355702877, | |
| "distil_loss": 0.0, | |
| "epoch": 1.99, | |
| "learning_rate": 9.324009324009324e-05, | |
| "loss": 0.6305, | |
| "nnz_perc": 1.0, | |
| "progress": 0.9905594405594406, | |
| "regu_lambda": 0.0, | |
| "regu_loss": 0.0, | |
| "step": 11000, | |
| "threshold": 1.0 | |
| }, | |
| { | |
| "ce_loss": 0.6976410485804081, | |
| "distil_loss": 0.0, | |
| "epoch": 1.99, | |
| "eval_ampere_temperature": 19.99999781767362, | |
| "eval_exact_match": 83.74645222327341, | |
| "eval_f1": 90.78776054621733, | |
| "eval_progress": 0.9952214452214452, | |
| "eval_regu_lambda": 0.0, | |
| "eval_threshold": 1.0, | |
| "nnz_perc": 1.0, | |
| "regu_loss": 0.0, | |
| "step": 11040 | |
| } | |
| ], | |
| "max_steps": 11080, | |
| "num_train_epochs": 2, | |
| "total_flos": 0, | |
| "trial_name": "hp_mnop-albert-base-v2_tn-albert-base-v2_od-__data_2to__devel_data__nn_pruning__output_sequence__squad_test_teacher___es-steps_pdebs128_nte2_ws2500_ls250_ss1380_stl50_est1380_rn-__da--3c944a736efd9cf3", | |
| "trial_params": {} | |
| } | |