| { | |
| "best_global_step": 15500, | |
| "best_metric": 0.9434096975688787, | |
| "best_model_checkpoint": "./arabert_author_model_full/checkpoint-15500", | |
| "epoch": 3.374700631395602, | |
| "eval_steps": 500, | |
| "global_step": 15500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10886131069018071, | |
| "grad_norm": 745471.1875, | |
| "learning_rate": 2.171926006528836e-05, | |
| "loss": 2.2995, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.10886131069018071, | |
| "eval_accuracy": 0.6985195154777928, | |
| "eval_f1_macro": 0.601246564201863, | |
| "eval_f1_micro": 0.6985195154777928, | |
| "eval_loss": 1.2079353332519531, | |
| "eval_precision_macro": 0.6412966664769482, | |
| "eval_precision_micro": 0.6985195154777928, | |
| "eval_recall_macro": 0.6419387939365965, | |
| "eval_recall_micro": 0.6985195154777928, | |
| "eval_runtime": 14.7462, | |
| "eval_samples_per_second": 50.386, | |
| "eval_steps_per_second": 3.187, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.21772262138036141, | |
| "grad_norm": 956772.5625, | |
| "learning_rate": 4.348204570184984e-05, | |
| "loss": 0.849, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.21772262138036141, | |
| "eval_accuracy": 0.819650067294751, | |
| "eval_f1_macro": 0.7996069224582287, | |
| "eval_f1_micro": 0.819650067294751, | |
| "eval_loss": 0.5631475448608398, | |
| "eval_precision_macro": 0.80345079706281, | |
| "eval_precision_micro": 0.819650067294751, | |
| "eval_recall_macro": 0.82483257704162, | |
| "eval_recall_micro": 0.819650067294751, | |
| "eval_runtime": 14.7707, | |
| "eval_samples_per_second": 50.302, | |
| "eval_steps_per_second": 3.182, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.32658393207054215, | |
| "grad_norm": 326904.5, | |
| "learning_rate": 6.524483133841132e-05, | |
| "loss": 0.5868, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.32658393207054215, | |
| "eval_accuracy": 0.8021534320323015, | |
| "eval_f1_macro": 0.793396840762137, | |
| "eval_f1_micro": 0.8021534320323015, | |
| "eval_loss": 0.6846649646759033, | |
| "eval_precision_macro": 0.8435106749075885, | |
| "eval_precision_micro": 0.8021534320323015, | |
| "eval_recall_macro": 0.7916833340258262, | |
| "eval_recall_micro": 0.8021534320323015, | |
| "eval_runtime": 14.8117, | |
| "eval_samples_per_second": 50.163, | |
| "eval_steps_per_second": 3.173, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.43544524276072283, | |
| "grad_norm": 218412.828125, | |
| "learning_rate": 7.998128491699842e-05, | |
| "loss": 0.5612, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.43544524276072283, | |
| "eval_accuracy": 0.8519515477792732, | |
| "eval_f1_macro": 0.8247069527158585, | |
| "eval_f1_micro": 0.8519515477792732, | |
| "eval_loss": 0.4477691948413849, | |
| "eval_precision_macro": 0.8896978331250329, | |
| "eval_precision_micro": 0.8519515477792732, | |
| "eval_recall_macro": 0.8379996548860711, | |
| "eval_recall_micro": 0.8519515477792732, | |
| "eval_runtime": 14.7186, | |
| "eval_samples_per_second": 50.48, | |
| "eval_steps_per_second": 3.193, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5443065534509035, | |
| "grad_norm": 1928294.625, | |
| "learning_rate": 7.968493088594472e-05, | |
| "loss": 0.4929, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.5443065534509035, | |
| "eval_accuracy": 0.873485868102288, | |
| "eval_f1_macro": 0.8688756385026712, | |
| "eval_f1_micro": 0.873485868102288, | |
| "eval_loss": 0.3026486039161682, | |
| "eval_precision_macro": 0.878210989714668, | |
| "eval_precision_micro": 0.873485868102288, | |
| "eval_recall_macro": 0.8859921080399548, | |
| "eval_recall_micro": 0.873485868102288, | |
| "eval_runtime": 14.7131, | |
| "eval_samples_per_second": 50.499, | |
| "eval_steps_per_second": 3.194, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6531678641410843, | |
| "grad_norm": 3023410.5, | |
| "learning_rate": 7.903065943344406e-05, | |
| "loss": 0.4618, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.6531678641410843, | |
| "eval_accuracy": 0.8613728129205922, | |
| "eval_f1_macro": 0.8296445269102163, | |
| "eval_f1_micro": 0.8613728129205922, | |
| "eval_loss": 0.43775779008865356, | |
| "eval_precision_macro": 0.8710561256381226, | |
| "eval_precision_micro": 0.8613728129205922, | |
| "eval_recall_macro": 0.8541696546910839, | |
| "eval_recall_micro": 0.8613728129205922, | |
| "eval_runtime": 14.7062, | |
| "eval_samples_per_second": 50.523, | |
| "eval_steps_per_second": 3.196, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.762029174831265, | |
| "grad_norm": 211605.15625, | |
| "learning_rate": 7.802437141773096e-05, | |
| "loss": 0.4028, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.762029174831265, | |
| "eval_accuracy": 0.8950201884253028, | |
| "eval_f1_macro": 0.8917785158702655, | |
| "eval_f1_micro": 0.8950201884253028, | |
| "eval_loss": 0.25510504841804504, | |
| "eval_precision_macro": 0.9107123575695487, | |
| "eval_precision_micro": 0.8950201884253028, | |
| "eval_recall_macro": 0.9057010565367906, | |
| "eval_recall_micro": 0.8950201884253028, | |
| "eval_runtime": 14.7188, | |
| "eval_samples_per_second": 50.48, | |
| "eval_steps_per_second": 3.193, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.8708904855214457, | |
| "grad_norm": 37626.74609375, | |
| "learning_rate": 7.667514252581752e-05, | |
| "loss": 0.3747, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.8708904855214457, | |
| "eval_accuracy": 0.892328398384926, | |
| "eval_f1_macro": 0.8948877387080549, | |
| "eval_f1_micro": 0.892328398384926, | |
| "eval_loss": 0.2622196674346924, | |
| "eval_precision_macro": 0.9437605053976897, | |
| "eval_precision_micro": 0.892328398384926, | |
| "eval_recall_macro": 0.9063603025064753, | |
| "eval_recall_micro": 0.892328398384926, | |
| "eval_runtime": 14.7613, | |
| "eval_samples_per_second": 50.334, | |
| "eval_steps_per_second": 3.184, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.9797517962116263, | |
| "grad_norm": 341548.65625, | |
| "learning_rate": 7.499514142009407e-05, | |
| "loss": 0.3686, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.9797517962116263, | |
| "eval_accuracy": 0.901749663526245, | |
| "eval_f1_macro": 0.9071958475193036, | |
| "eval_f1_micro": 0.9017496635262451, | |
| "eval_loss": 0.21770605444908142, | |
| "eval_precision_macro": 0.9392339212137314, | |
| "eval_precision_micro": 0.901749663526245, | |
| "eval_recall_macro": 0.9187280722751042, | |
| "eval_recall_micro": 0.901749663526245, | |
| "eval_runtime": 14.7411, | |
| "eval_samples_per_second": 50.403, | |
| "eval_steps_per_second": 3.188, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.088613106901807, | |
| "grad_norm": 51656.32421875, | |
| "learning_rate": 7.299951998946065e-05, | |
| "loss": 0.2762, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.088613106901807, | |
| "eval_accuracy": 0.8896366083445492, | |
| "eval_f1_macro": 0.8803954267807832, | |
| "eval_f1_micro": 0.8896366083445492, | |
| "eval_loss": 0.37781140208244324, | |
| "eval_precision_macro": 0.8980066417509999, | |
| "eval_precision_micro": 0.8896366083445492, | |
| "eval_recall_macro": 0.8882222866157216, | |
| "eval_recall_micro": 0.8896366083445492, | |
| "eval_runtime": 15.0879, | |
| "eval_samples_per_second": 49.245, | |
| "eval_steps_per_second": 3.115, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.1974744175919878, | |
| "grad_norm": 1009913.0625, | |
| "learning_rate": 7.070627669481137e-05, | |
| "loss": 0.2851, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.1974744175919878, | |
| "eval_accuracy": 0.882907133243607, | |
| "eval_f1_macro": 0.8672894626796113, | |
| "eval_f1_micro": 0.882907133243607, | |
| "eval_loss": 0.38583362102508545, | |
| "eval_precision_macro": 0.9049625152940963, | |
| "eval_precision_micro": 0.882907133243607, | |
| "eval_recall_macro": 0.8813935878782198, | |
| "eval_recall_micro": 0.882907133243607, | |
| "eval_runtime": 14.7029, | |
| "eval_samples_per_second": 50.534, | |
| "eval_steps_per_second": 3.197, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.3063357282821686, | |
| "grad_norm": 26227.69140625, | |
| "learning_rate": 6.813609424135567e-05, | |
| "loss": 0.2818, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.3063357282821686, | |
| "eval_accuracy": 0.9138627187079408, | |
| "eval_f1_macro": 0.9250807107212078, | |
| "eval_f1_micro": 0.9138627187079408, | |
| "eval_loss": 0.1822730302810669, | |
| "eval_precision_macro": 0.9436200764635643, | |
| "eval_precision_micro": 0.9138627187079408, | |
| "eval_recall_macro": 0.9322277636580386, | |
| "eval_recall_micro": 0.9138627187079408, | |
| "eval_runtime": 14.7441, | |
| "eval_samples_per_second": 50.393, | |
| "eval_steps_per_second": 3.188, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.4151970389723492, | |
| "grad_norm": 87145.015625, | |
| "learning_rate": 6.531215304180572e-05, | |
| "loss": 0.2539, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.4151970389723492, | |
| "eval_accuracy": 0.9044414535666218, | |
| "eval_f1_macro": 0.9159118265135213, | |
| "eval_f1_micro": 0.9044414535666218, | |
| "eval_loss": 0.19744105637073517, | |
| "eval_precision_macro": 0.9248731430404993, | |
| "eval_precision_micro": 0.9044414535666218, | |
| "eval_recall_macro": 0.9361879615931227, | |
| "eval_recall_micro": 0.9044414535666218, | |
| "eval_runtime": 14.7205, | |
| "eval_samples_per_second": 50.474, | |
| "eval_steps_per_second": 3.193, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.52405834966253, | |
| "grad_norm": 4197689.5, | |
| "learning_rate": 6.22599221528008e-05, | |
| "loss": 0.2342, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.52405834966253, | |
| "eval_accuracy": 0.9152086137281292, | |
| "eval_f1_macro": 0.9209521774588028, | |
| "eval_f1_micro": 0.9152086137281292, | |
| "eval_loss": 0.16721387207508087, | |
| "eval_precision_macro": 0.9316385374819118, | |
| "eval_precision_micro": 0.9152086137281292, | |
| "eval_recall_macro": 0.9305594066426393, | |
| "eval_recall_micro": 0.9152086137281292, | |
| "eval_runtime": 14.7185, | |
| "eval_samples_per_second": 50.481, | |
| "eval_steps_per_second": 3.193, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.6329196603527105, | |
| "grad_norm": 29691.1875, | |
| "learning_rate": 5.900692957010821e-05, | |
| "loss": 0.2658, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.6329196603527105, | |
| "eval_accuracy": 0.9205921938088829, | |
| "eval_f1_macro": 0.9292673927082579, | |
| "eval_f1_micro": 0.9205921938088829, | |
| "eval_loss": 0.16926071047782898, | |
| "eval_precision_macro": 0.9467601029387086, | |
| "eval_precision_micro": 0.9205921938088829, | |
| "eval_recall_macro": 0.9353857192023052, | |
| "eval_recall_micro": 0.9205921938088829, | |
| "eval_runtime": 14.7038, | |
| "eval_samples_per_second": 50.531, | |
| "eval_steps_per_second": 3.196, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.7417809710428913, | |
| "grad_norm": 82702.546875, | |
| "learning_rate": 5.5582513954302386e-05, | |
| "loss": 0.2703, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.7417809710428913, | |
| "eval_accuracy": 0.917900403768506, | |
| "eval_f1_macro": 0.9205592899943698, | |
| "eval_f1_micro": 0.917900403768506, | |
| "eval_loss": 0.22037993371486664, | |
| "eval_precision_macro": 0.9459349396324186, | |
| "eval_precision_micro": 0.917900403768506, | |
| "eval_recall_macro": 0.9278516945604416, | |
| "eval_recall_micro": 0.917900403768506, | |
| "eval_runtime": 14.7085, | |
| "eval_samples_per_second": 50.515, | |
| "eval_steps_per_second": 3.195, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.850642281733072, | |
| "grad_norm": 450699.1875, | |
| "learning_rate": 5.201756002610252e-05, | |
| "loss": 0.2566, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.850642281733072, | |
| "eval_accuracy": 0.9098250336473755, | |
| "eval_f1_macro": 0.9126391472355347, | |
| "eval_f1_micro": 0.9098250336473755, | |
| "eval_loss": 0.26449093222618103, | |
| "eval_precision_macro": 0.9352643525302922, | |
| "eval_precision_micro": 0.9098250336473755, | |
| "eval_recall_macro": 0.931955435163728, | |
| "eval_recall_micro": 0.9098250336473755, | |
| "eval_runtime": 14.6939, | |
| "eval_samples_per_second": 50.565, | |
| "eval_steps_per_second": 3.199, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.959503592423253, | |
| "grad_norm": 37148.73046875, | |
| "learning_rate": 4.834422001783138e-05, | |
| "loss": 0.2242, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.959503592423253, | |
| "eval_accuracy": 0.9246298788694481, | |
| "eval_f1_macro": 0.9278695233625198, | |
| "eval_f1_micro": 0.9246298788694481, | |
| "eval_loss": 0.20524874329566956, | |
| "eval_precision_macro": 0.9473174570200222, | |
| "eval_precision_micro": 0.9246298788694481, | |
| "eval_recall_macro": 0.9317137486146517, | |
| "eval_recall_micro": 0.9246298788694481, | |
| "eval_runtime": 14.65, | |
| "eval_samples_per_second": 50.717, | |
| "eval_steps_per_second": 3.208, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.0683649031134337, | |
| "grad_norm": 65893.8984375, | |
| "learning_rate": 4.45956236932181e-05, | |
| "loss": 0.1672, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.0683649031134337, | |
| "eval_accuracy": 0.9165545087483177, | |
| "eval_f1_macro": 0.9239702133396492, | |
| "eval_f1_micro": 0.9165545087483177, | |
| "eval_loss": 0.3571414351463318, | |
| "eval_precision_macro": 0.9412785975210729, | |
| "eval_precision_micro": 0.9165545087483177, | |
| "eval_recall_macro": 0.9173054563259597, | |
| "eval_recall_micro": 0.9165545087483177, | |
| "eval_runtime": 14.749, | |
| "eval_samples_per_second": 50.376, | |
| "eval_steps_per_second": 3.187, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.177226213803614, | |
| "grad_norm": 20243.5546875, | |
| "learning_rate": 4.0805579550869046e-05, | |
| "loss": 0.1593, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.177226213803614, | |
| "eval_accuracy": 0.9125168236877523, | |
| "eval_f1_macro": 0.9238184226911409, | |
| "eval_f1_micro": 0.9125168236877523, | |
| "eval_loss": 0.30988800525665283, | |
| "eval_precision_macro": 0.9555289484815556, | |
| "eval_precision_micro": 0.9125168236877523, | |
| "eval_recall_macro": 0.9275764985418137, | |
| "eval_recall_micro": 0.9125168236877523, | |
| "eval_runtime": 15.0155, | |
| "eval_samples_per_second": 49.482, | |
| "eval_steps_per_second": 3.13, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.286087524493795, | |
| "grad_norm": 33157.19140625, | |
| "learning_rate": 3.7008269906245454e-05, | |
| "loss": 0.1799, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.286087524493795, | |
| "eval_accuracy": 0.9246298788694481, | |
| "eval_f1_macro": 0.9287251727049811, | |
| "eval_f1_micro": 0.9246298788694481, | |
| "eval_loss": 0.23414301872253418, | |
| "eval_precision_macro": 0.959944603131214, | |
| "eval_precision_micro": 0.9246298788694481, | |
| "eval_recall_macro": 0.9306134629626335, | |
| "eval_recall_micro": 0.9246298788694481, | |
| "eval_runtime": 14.6983, | |
| "eval_samples_per_second": 50.55, | |
| "eval_steps_per_second": 3.198, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.3949488351839756, | |
| "grad_norm": 48777.84375, | |
| "learning_rate": 3.323794260219589e-05, | |
| "loss": 0.166, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.3949488351839756, | |
| "eval_accuracy": 0.9057873485868102, | |
| "eval_f1_macro": 0.9123153410480982, | |
| "eval_f1_micro": 0.9057873485868102, | |
| "eval_loss": 0.3453662395477295, | |
| "eval_precision_macro": 0.9446104426733389, | |
| "eval_precision_micro": 0.9057873485868102, | |
| "eval_recall_macro": 0.91935239522038, | |
| "eval_recall_micro": 0.9057873485868102, | |
| "eval_runtime": 14.7404, | |
| "eval_samples_per_second": 50.406, | |
| "eval_steps_per_second": 3.189, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.5038101458741564, | |
| "grad_norm": 33563.56640625, | |
| "learning_rate": 2.9528602128499004e-05, | |
| "loss": 0.162, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.5038101458741564, | |
| "eval_accuracy": 0.9098250336473755, | |
| "eval_f1_macro": 0.9212878627631594, | |
| "eval_f1_micro": 0.9098250336473755, | |
| "eval_loss": 0.22809743881225586, | |
| "eval_precision_macro": 0.9389309808956737, | |
| "eval_precision_micro": 0.9098250336473755, | |
| "eval_recall_macro": 0.9311247877025975, | |
| "eval_recall_micro": 0.9098250336473755, | |
| "eval_runtime": 14.666, | |
| "eval_samples_per_second": 50.661, | |
| "eval_steps_per_second": 3.205, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.612671456564337, | |
| "grad_norm": 58977.125, | |
| "learning_rate": 2.591370293620146e-05, | |
| "loss": 0.1452, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.612671456564337, | |
| "eval_accuracy": 0.9219380888290714, | |
| "eval_f1_macro": 0.9232635700162879, | |
| "eval_f1_micro": 0.9219380888290714, | |
| "eval_loss": 0.2860707640647888, | |
| "eval_precision_macro": 0.9426347574998575, | |
| "eval_precision_micro": 0.9219380888290714, | |
| "eval_recall_macro": 0.9262974863930373, | |
| "eval_recall_micro": 0.9219380888290714, | |
| "eval_runtime": 14.8095, | |
| "eval_samples_per_second": 50.171, | |
| "eval_steps_per_second": 3.174, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.7215327672545175, | |
| "grad_norm": 46900.25390625, | |
| "learning_rate": 2.2425847712741887e-05, | |
| "loss": 0.1418, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.7215327672545175, | |
| "eval_accuracy": 0.9286675639300135, | |
| "eval_f1_macro": 0.9357990563843356, | |
| "eval_f1_micro": 0.9286675639300135, | |
| "eval_loss": 0.15669873356819153, | |
| "eval_precision_macro": 0.9529768865317036, | |
| "eval_precision_micro": 0.9286675639300135, | |
| "eval_recall_macro": 0.9417303559122717, | |
| "eval_recall_micro": 0.9286675639300135, | |
| "eval_runtime": 14.7072, | |
| "eval_samples_per_second": 50.52, | |
| "eval_steps_per_second": 3.196, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.8303940779446983, | |
| "grad_norm": 37592.3515625, | |
| "learning_rate": 1.9096493339109878e-05, | |
| "loss": 0.1429, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.8303940779446983, | |
| "eval_accuracy": 0.9165545087483177, | |
| "eval_f1_macro": 0.9295728643158702, | |
| "eval_f1_micro": 0.9165545087483177, | |
| "eval_loss": 0.22479559481143951, | |
| "eval_precision_macro": 0.9605098350591709, | |
| "eval_precision_micro": 0.9165545087483177, | |
| "eval_recall_macro": 0.9328126952515738, | |
| "eval_recall_micro": 0.9165545087483177, | |
| "eval_runtime": 14.6901, | |
| "eval_samples_per_second": 50.578, | |
| "eval_steps_per_second": 3.199, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.939255388634879, | |
| "grad_norm": 79597.40625, | |
| "learning_rate": 1.5955667181005554e-05, | |
| "loss": 0.1293, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.939255388634879, | |
| "eval_accuracy": 0.9246298788694481, | |
| "eval_f1_macro": 0.9319848397676713, | |
| "eval_f1_micro": 0.9246298788694481, | |
| "eval_loss": 0.27543124556541443, | |
| "eval_precision_macro": 0.9589344708678029, | |
| "eval_precision_micro": 0.9246298788694481, | |
| "eval_recall_macro": 0.932925082879603, | |
| "eval_recall_micro": 0.9246298788694481, | |
| "eval_runtime": 14.726, | |
| "eval_samples_per_second": 50.455, | |
| "eval_steps_per_second": 3.192, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.04811669932506, | |
| "grad_norm": 25773.66796875, | |
| "learning_rate": 1.3031696272762192e-05, | |
| "loss": 0.1137, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.04811669932506, | |
| "eval_accuracy": 0.9246298788694481, | |
| "eval_f1_macro": 0.937910042741771, | |
| "eval_f1_micro": 0.9246298788694481, | |
| "eval_loss": 0.20125848054885864, | |
| "eval_precision_macro": 0.9546735463378956, | |
| "eval_precision_micro": 0.9246298788694481, | |
| "eval_recall_macro": 0.9429177293988182, | |
| "eval_recall_micro": 0.9246298788694481, | |
| "eval_runtime": 15.0054, | |
| "eval_samples_per_second": 49.515, | |
| "eval_steps_per_second": 3.132, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 3.1569780100152407, | |
| "grad_norm": 17888.46484375, | |
| "learning_rate": 1.0350951836516297e-05, | |
| "loss": 0.0987, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.1569780100152407, | |
| "eval_accuracy": 0.9232839838492598, | |
| "eval_f1_macro": 0.9266276405829272, | |
| "eval_f1_micro": 0.9232839838492598, | |
| "eval_loss": 0.29369959235191345, | |
| "eval_precision_macro": 0.9436536313571009, | |
| "eval_precision_micro": 0.9232839838492598, | |
| "eval_recall_macro": 0.9283196203410136, | |
| "eval_recall_micro": 0.9232839838492598, | |
| "eval_runtime": 14.7764, | |
| "eval_samples_per_second": 50.283, | |
| "eval_steps_per_second": 3.181, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 3.265839320705421, | |
| "grad_norm": 85828.9375, | |
| "learning_rate": 7.9376114407998e-06, | |
| "loss": 0.0859, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.265839320705421, | |
| "eval_accuracy": 0.9246298788694481, | |
| "eval_f1_macro": 0.9402166974265765, | |
| "eval_f1_micro": 0.9246298788694481, | |
| "eval_loss": 0.17889092862606049, | |
| "eval_precision_macro": 0.9685045177945787, | |
| "eval_precision_micro": 0.9246298788694481, | |
| "eval_recall_macro": 0.9463450172046672, | |
| "eval_recall_micro": 0.9246298788694481, | |
| "eval_runtime": 14.7495, | |
| "eval_samples_per_second": 50.375, | |
| "eval_steps_per_second": 3.187, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 3.374700631395602, | |
| "grad_norm": 146288.75, | |
| "learning_rate": 5.813440943640527e-06, | |
| "loss": 0.0857, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.374700631395602, | |
| "eval_accuracy": 0.927321668909825, | |
| "eval_f1_macro": 0.9434096975688787, | |
| "eval_f1_micro": 0.927321668909825, | |
| "eval_loss": 0.16961060464382172, | |
| "eval_precision_macro": 0.9641802881027017, | |
| "eval_precision_micro": 0.927321668909825, | |
| "eval_recall_macro": 0.9472331991452233, | |
| "eval_recall_micro": 0.927321668909825, | |
| "eval_runtime": 14.7305, | |
| "eval_samples_per_second": 50.44, | |
| "eval_steps_per_second": 3.191, | |
| "step": 15500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 18372, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.52555679969065e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |