| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9927797833935017, |
| "global_step": 11040, |
| "is_hyper_param_search": true, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 4.599947213172912, |
| "distil_loss": 0.0, |
| "epoch": 0.05, |
| "learning_rate": 0.001, |
| "loss": 4.5999, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.8339186582565308, |
| "distil_loss": 0.0, |
| "epoch": 0.09, |
| "learning_rate": 0.002, |
| "loss": 1.8339, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.3567713406085968, |
| "distil_loss": 0.0, |
| "epoch": 0.14, |
| "learning_rate": 0.003, |
| "loss": 1.3568, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.2095605379343033, |
| "distil_loss": 0.0, |
| "epoch": 0.18, |
| "learning_rate": 0.004, |
| "loss": 1.2096, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 1000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.1451576855182648, |
| "distil_loss": 0.0, |
| "epoch": 0.23, |
| "learning_rate": 0.005, |
| "loss": 1.1452, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 1250, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 1.1159179366551912, |
| "distil_loss": 0.0, |
| "epoch": 0.25, |
| "eval_ampere_temperature": 0.0, |
| "eval_exact_match": 77.360454115421, |
| "eval_f1": 86.34721419771964, |
| "eval_progress": 0.0, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 1380 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.0835382461547851, |
| "distil_loss": 0.0, |
| "epoch": 0.27, |
| "learning_rate": 0.006, |
| "loss": 1.1004, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 1500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.1237352261543274, |
| "distil_loss": 0.0, |
| "epoch": 0.32, |
| "learning_rate": 0.006999999999999999, |
| "loss": 1.1237, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 1750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.116382148861885, |
| "distil_loss": 0.0, |
| "epoch": 0.36, |
| "learning_rate": 0.008, |
| "loss": 1.1164, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 2000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.0670130407810212, |
| "distil_loss": 0.0, |
| "epoch": 0.41, |
| "learning_rate": 0.009000000000000001, |
| "loss": 1.067, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 2250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 0.0, |
| "ce_loss": 1.0639437032938004, |
| "distil_loss": 0.0, |
| "epoch": 0.45, |
| "learning_rate": 0.01, |
| "loss": 1.0639, |
| "nnz_perc": 1.0, |
| "progress": 0.0, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 2500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 1.6912145472259645, |
| "ce_loss": 1.0629408322572709, |
| "distil_loss": 0.0, |
| "epoch": 0.5, |
| "learning_rate": 0.00970862470862471, |
| "loss": 1.0629, |
| "nnz_perc": 1.0, |
| "progress": 0.029020979020979, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 2750, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 1.0985989689826965, |
| "distil_loss": 0.0, |
| "epoch": 0.5, |
| "eval_ampere_temperature": 1.7570655286803998, |
| "eval_exact_match": 75.37369914853359, |
| "eval_f1": 85.4846023509551, |
| "eval_progress": 0.03018648018648018, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 2760 |
| }, |
| { |
| "ampere_temperature": 3.2905000860378912, |
| "ce_loss": 1.0230497049788634, |
| "distil_loss": 0.0, |
| "epoch": 0.54, |
| "learning_rate": 0.009417249417249416, |
| "loss": 1.0261, |
| "nnz_perc": 1.0, |
| "progress": 0.058158508158508204, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 3000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 4.793831310474058, |
| "ce_loss": 0.9981409941911698, |
| "distil_loss": 0.0, |
| "epoch": 0.59, |
| "learning_rate": 0.009125874125874126, |
| "loss": 0.9981, |
| "nnz_perc": 1.0, |
| "progress": 0.0872960372960373, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 3250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 6.204176736633212, |
| "ce_loss": 1.0074045011997224, |
| "distil_loss": 0.0, |
| "epoch": 0.63, |
| "learning_rate": 0.008834498834498834, |
| "loss": 1.0074, |
| "nnz_perc": 1.0, |
| "progress": 0.1164335664335664, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 3500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 7.524504880614105, |
| "ce_loss": 0.9891920503377915, |
| "distil_loss": 0.0, |
| "epoch": 0.68, |
| "learning_rate": 0.008543123543123544, |
| "loss": 0.9892, |
| "nnz_perc": 1.0, |
| "progress": 0.1455710955710956, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 3750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 8.757784258515466, |
| "ce_loss": 1.0083434996008873, |
| "distil_loss": 0.0, |
| "epoch": 0.72, |
| "learning_rate": 0.008251748251748252, |
| "loss": 1.0083, |
| "nnz_perc": 1.0, |
| "progress": 0.1747086247086247, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 4000, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 0.9699458577803203, |
| "distil_loss": 0.0, |
| "epoch": 0.75, |
| "eval_ampere_temperature": 9.411504281933276, |
| "eval_exact_match": 79.94323557237465, |
| "eval_f1": 88.17033886272301, |
| "eval_progress": 0.191025641025641, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 4140 |
| }, |
| { |
| "ampere_temperature": 9.906983386436048, |
| "ce_loss": 0.9698418254202062, |
| "distil_loss": 0.0, |
| "epoch": 0.77, |
| "learning_rate": 0.00796037296037296, |
| "loss": 0.9699, |
| "nnz_perc": 1.0, |
| "progress": 0.2038461538461538, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 4250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 10.97507078047459, |
| "ce_loss": 0.9425091907978058, |
| "distil_loss": 0.0, |
| "epoch": 0.81, |
| "learning_rate": 0.007668997668997669, |
| "loss": 0.9425, |
| "nnz_perc": 1.0, |
| "progress": 0.232983682983683, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 4500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 11.965014956729835, |
| "ce_loss": 0.9731772248744964, |
| "distil_loss": 0.0, |
| "epoch": 0.86, |
| "learning_rate": 0.007377622377622378, |
| "loss": 0.9732, |
| "nnz_perc": 1.0, |
| "progress": 0.2621212121212122, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 4750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 12.879784431300521, |
| "ce_loss": 0.9197172073125839, |
| "distil_loss": 0.0, |
| "epoch": 0.9, |
| "learning_rate": 0.007086247086247086, |
| "loss": 0.9197, |
| "nnz_perc": 1.0, |
| "progress": 0.2912587412587413, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 5000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 13.722347720285395, |
| "ce_loss": 0.9390108388662338, |
| "distil_loss": 0.0, |
| "epoch": 0.95, |
| "learning_rate": 0.006794871794871795, |
| "loss": 0.939, |
| "nnz_perc": 1.0, |
| "progress": 0.3203962703962704, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 5250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 14.495673339783197, |
| "ce_loss": 0.9188237161636352, |
| "distil_loss": 0.0, |
| "epoch": 0.99, |
| "learning_rate": 0.006503496503496503, |
| "loss": 0.9188, |
| "nnz_perc": 1.0, |
| "progress": 0.3495337995337995, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 5500, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 0.9402093678712845, |
| "distil_loss": 0.0, |
| "epoch": 1.0, |
| "eval_ampere_temperature": 14.55463723501537, |
| "eval_exact_match": 81.63670766319773, |
| "eval_f1": 89.21446798933258, |
| "eval_progress": 0.35186480186480185, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 5520 |
| }, |
| { |
| "ampere_temperature": 15.202729805892675, |
| "ce_loss": 0.7292252867118172, |
| "distil_loss": 0.0, |
| "epoch": 1.04, |
| "learning_rate": 0.006212121212121212, |
| "loss": 0.7461, |
| "nnz_perc": 1.0, |
| "progress": 0.3786713286713287, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 5750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 15.846485634712565, |
| "ce_loss": 0.7380791381597519, |
| "distil_loss": 0.0, |
| "epoch": 1.08, |
| "learning_rate": 0.005920745920745921, |
| "loss": 0.7381, |
| "nnz_perc": 1.0, |
| "progress": 0.4078088578088578, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 6000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 16.429909342341613, |
| "ce_loss": 0.7548821606636047, |
| "distil_loss": 0.0, |
| "epoch": 1.13, |
| "learning_rate": 0.005629370629370629, |
| "loss": 0.7549, |
| "nnz_perc": 1.0, |
| "progress": 0.436946386946387, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 6250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 16.955969444878562, |
| "ce_loss": 0.7157313173413277, |
| "distil_loss": 0.0, |
| "epoch": 1.17, |
| "learning_rate": 0.005337995337995338, |
| "loss": 0.7157, |
| "nnz_perc": 1.0, |
| "progress": 0.4660839160839161, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 6500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 17.427634458422148, |
| "ce_loss": 0.7611533465385437, |
| "distil_loss": 0.0, |
| "epoch": 1.22, |
| "learning_rate": 0.005046620046620046, |
| "loss": 0.7612, |
| "nnz_perc": 1.0, |
| "progress": 0.4952214452214452, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 6750, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 0.7508984424670537, |
| "distil_loss": 0.0, |
| "epoch": 1.25, |
| "eval_ampere_temperature": 17.68575872652857, |
| "eval_exact_match": 81.51371807000946, |
| "eval_f1": 88.80037767793473, |
| "eval_progress": 0.5127039627039627, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 6900 |
| }, |
| { |
| "ampere_temperature": 17.847872899071124, |
| "ce_loss": 0.6947148644924164, |
| "distil_loss": 0.0, |
| "epoch": 1.26, |
| "learning_rate": 0.004755244755244755, |
| "loss": 0.7284, |
| "nnz_perc": 1.0, |
| "progress": 0.5243589743589744, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 7000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 18.219653282924224, |
| "ce_loss": 0.7663285417556762, |
| "distil_loss": 0.0, |
| "epoch": 1.31, |
| "learning_rate": 0.004463869463869464, |
| "loss": 0.7663, |
| "nnz_perc": 1.0, |
| "progress": 0.5534965034965035, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 7250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 18.545944126080197, |
| "ce_loss": 0.691897637873888, |
| "distil_loss": 0.0, |
| "epoch": 1.35, |
| "learning_rate": 0.004172494172494173, |
| "loss": 0.6919, |
| "nnz_perc": 1.0, |
| "progress": 0.5826340326340327, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 7500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 18.82971394463778, |
| "ce_loss": 0.7088325002193451, |
| "distil_loss": 0.0, |
| "epoch": 1.4, |
| "learning_rate": 0.0038811188811188812, |
| "loss": 0.7088, |
| "nnz_perc": 1.0, |
| "progress": 0.6117715617715618, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 7750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.07393125469572, |
| "ce_loss": 0.7107383124232293, |
| "distil_loss": 0.0, |
| "epoch": 1.44, |
| "learning_rate": 0.0035897435897435897, |
| "loss": 0.7107, |
| "nnz_perc": 1.0, |
| "progress": 0.6409090909090909, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 8000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.281564572352753, |
| "ce_loss": 0.7073436776399612, |
| "distil_loss": 0.0, |
| "epoch": 1.49, |
| "learning_rate": 0.0032983682983682983, |
| "loss": 0.7073, |
| "nnz_perc": 1.0, |
| "progress": 0.6700466200466201, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 8250, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 0.7176821072896321, |
| "distil_loss": 0.0, |
| "epoch": 1.49, |
| "eval_ampere_temperature": 19.304163095074752, |
| "eval_exact_match": 82.71523178807946, |
| "eval_f1": 89.82467226075393, |
| "eval_progress": 0.6735431235431235, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 8280 |
| }, |
| { |
| "ampere_temperature": 19.455582413707628, |
| "ce_loss": 0.7027889224615964, |
| "distil_loss": 0.0, |
| "epoch": 1.53, |
| "learning_rate": 0.0030069930069930068, |
| "loss": 0.7046, |
| "nnz_perc": 1.0, |
| "progress": 0.6991841491841492, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 8500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.598953294859086, |
| "ce_loss": 0.6954642720222474, |
| "distil_loss": 0.0, |
| "epoch": 1.58, |
| "learning_rate": 0.0027156177156177157, |
| "loss": 0.6955, |
| "nnz_perc": 1.0, |
| "progress": 0.7283216783216783, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 8750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.71464573190587, |
| "ce_loss": 0.7050508892536164, |
| "distil_loss": 0.0, |
| "epoch": 1.62, |
| "learning_rate": 0.0024242424242424242, |
| "loss": 0.7051, |
| "nnz_perc": 1.0, |
| "progress": 0.7574592074592075, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 9000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.805628240946717, |
| "ce_loss": 0.6534205512404442, |
| "distil_loss": 0.0, |
| "epoch": 1.67, |
| "learning_rate": 0.0021328671328671328, |
| "loss": 0.6534, |
| "nnz_perc": 1.0, |
| "progress": 0.7865967365967366, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 9250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.874869338080376, |
| "ce_loss": 0.6931327093839645, |
| "distil_loss": 0.0, |
| "epoch": 1.71, |
| "learning_rate": 0.0018414918414918417, |
| "loss": 0.6931, |
| "nnz_perc": 1.0, |
| "progress": 0.8157342657342658, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 9500, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 0.6803905916400254, |
| "distil_loss": 0.0, |
| "epoch": 1.74, |
| "eval_ampere_temperature": 19.90914467925581, |
| "eval_exact_match": 83.3112582781457, |
| "eval_f1": 90.48253679391624, |
| "eval_progress": 0.8343822843822843, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 9660 |
| }, |
| { |
| "ampere_temperature": 19.925337539405586, |
| "ce_loss": 0.6604658047358195, |
| "distil_loss": 0.0, |
| "epoch": 1.76, |
| "learning_rate": 0.0015501165501165502, |
| "loss": 0.6732, |
| "nnz_perc": 1.0, |
| "progress": 0.8448717948717949, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 9750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.960001361021092, |
| "ce_loss": 0.6589477426409721, |
| "distil_loss": 0.0, |
| "epoch": 1.81, |
| "learning_rate": 0.001258741258741259, |
| "loss": 0.6589, |
| "nnz_perc": 1.0, |
| "progress": 0.874009324009324, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 10000, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.981829319025636, |
| "ce_loss": 0.6645486508607864, |
| "distil_loss": 0.0, |
| "epoch": 1.85, |
| "learning_rate": 0.0009673659673659674, |
| "loss": 0.6645, |
| "nnz_perc": 1.0, |
| "progress": 0.9031468531468532, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 10250, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.99378992951796, |
| "ce_loss": 0.6627120378017426, |
| "distil_loss": 0.0, |
| "epoch": 1.9, |
| "learning_rate": 0.000675990675990676, |
| "loss": 0.6627, |
| "nnz_perc": 1.0, |
| "progress": 0.9322843822843823, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 10500, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.998851708596806, |
| "ce_loss": 0.6525639802217483, |
| "distil_loss": 0.0, |
| "epoch": 1.94, |
| "learning_rate": 0.00038461538461538467, |
| "loss": 0.6526, |
| "nnz_perc": 1.0, |
| "progress": 0.9614219114219115, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 10750, |
| "threshold": 1.0 |
| }, |
| { |
| "ampere_temperature": 19.999983172360917, |
| "ce_loss": 0.630506355702877, |
| "distil_loss": 0.0, |
| "epoch": 1.99, |
| "learning_rate": 9.324009324009324e-05, |
| "loss": 0.6305, |
| "nnz_perc": 1.0, |
| "progress": 0.9905594405594406, |
| "regu_lambda": 0.0, |
| "regu_loss": 0.0, |
| "step": 11000, |
| "threshold": 1.0 |
| }, |
| { |
| "ce_loss": 0.6976410485804081, |
| "distil_loss": 0.0, |
| "epoch": 1.99, |
| "eval_ampere_temperature": 19.99999781767362, |
| "eval_exact_match": 83.74645222327341, |
| "eval_f1": 90.78776054621733, |
| "eval_progress": 0.9952214452214452, |
| "eval_regu_lambda": 0.0, |
| "eval_threshold": 1.0, |
| "nnz_perc": 1.0, |
| "regu_loss": 0.0, |
| "step": 11040 |
| } |
| ], |
| "max_steps": 11080, |
| "num_train_epochs": 2, |
| "total_flos": 0, |
| "trial_name": "hp_mnop-albert-base-v2_tn-albert-base-v2_od-__data_2to__devel_data__nn_pruning__output_sequence__squad_test_teacher___es-steps_pdebs128_nte2_ws2500_ls250_ss1380_stl50_est1380_rn-__da--3c944a736efd9cf3", |
| "trial_params": {} |
| } |
|
|