{ "best_global_step": 15500, "best_metric": 0.9434096975688787, "best_model_checkpoint": "./arabert_author_model_full/checkpoint-15500", "epoch": 3.374700631395602, "eval_steps": 500, "global_step": 15500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10886131069018071, "grad_norm": 745471.1875, "learning_rate": 2.171926006528836e-05, "loss": 2.2995, "step": 500 }, { "epoch": 0.10886131069018071, "eval_accuracy": 0.6985195154777928, "eval_f1_macro": 0.601246564201863, "eval_f1_micro": 0.6985195154777928, "eval_loss": 1.2079353332519531, "eval_precision_macro": 0.6412966664769482, "eval_precision_micro": 0.6985195154777928, "eval_recall_macro": 0.6419387939365965, "eval_recall_micro": 0.6985195154777928, "eval_runtime": 14.7462, "eval_samples_per_second": 50.386, "eval_steps_per_second": 3.187, "step": 500 }, { "epoch": 0.21772262138036141, "grad_norm": 956772.5625, "learning_rate": 4.348204570184984e-05, "loss": 0.849, "step": 1000 }, { "epoch": 0.21772262138036141, "eval_accuracy": 0.819650067294751, "eval_f1_macro": 0.7996069224582287, "eval_f1_micro": 0.819650067294751, "eval_loss": 0.5631475448608398, "eval_precision_macro": 0.80345079706281, "eval_precision_micro": 0.819650067294751, "eval_recall_macro": 0.82483257704162, "eval_recall_micro": 0.819650067294751, "eval_runtime": 14.7707, "eval_samples_per_second": 50.302, "eval_steps_per_second": 3.182, "step": 1000 }, { "epoch": 0.32658393207054215, "grad_norm": 326904.5, "learning_rate": 6.524483133841132e-05, "loss": 0.5868, "step": 1500 }, { "epoch": 0.32658393207054215, "eval_accuracy": 0.8021534320323015, "eval_f1_macro": 0.793396840762137, "eval_f1_micro": 0.8021534320323015, "eval_loss": 0.6846649646759033, "eval_precision_macro": 0.8435106749075885, "eval_precision_micro": 0.8021534320323015, "eval_recall_macro": 0.7916833340258262, "eval_recall_micro": 0.8021534320323015, "eval_runtime": 14.8117, "eval_samples_per_second": 50.163, "eval_steps_per_second": 3.173, "step": 1500 }, { "epoch": 0.43544524276072283, "grad_norm": 218412.828125, "learning_rate": 7.998128491699842e-05, "loss": 0.5612, "step": 2000 }, { "epoch": 0.43544524276072283, "eval_accuracy": 0.8519515477792732, "eval_f1_macro": 0.8247069527158585, "eval_f1_micro": 0.8519515477792732, "eval_loss": 0.4477691948413849, "eval_precision_macro": 0.8896978331250329, "eval_precision_micro": 0.8519515477792732, "eval_recall_macro": 0.8379996548860711, "eval_recall_micro": 0.8519515477792732, "eval_runtime": 14.7186, "eval_samples_per_second": 50.48, "eval_steps_per_second": 3.193, "step": 2000 }, { "epoch": 0.5443065534509035, "grad_norm": 1928294.625, "learning_rate": 7.968493088594472e-05, "loss": 0.4929, "step": 2500 }, { "epoch": 0.5443065534509035, "eval_accuracy": 0.873485868102288, "eval_f1_macro": 0.8688756385026712, "eval_f1_micro": 0.873485868102288, "eval_loss": 0.3026486039161682, "eval_precision_macro": 0.878210989714668, "eval_precision_micro": 0.873485868102288, "eval_recall_macro": 0.8859921080399548, "eval_recall_micro": 0.873485868102288, "eval_runtime": 14.7131, "eval_samples_per_second": 50.499, "eval_steps_per_second": 3.194, "step": 2500 }, { "epoch": 0.6531678641410843, "grad_norm": 3023410.5, "learning_rate": 7.903065943344406e-05, "loss": 0.4618, "step": 3000 }, { "epoch": 0.6531678641410843, "eval_accuracy": 0.8613728129205922, "eval_f1_macro": 0.8296445269102163, "eval_f1_micro": 0.8613728129205922, "eval_loss": 0.43775779008865356, "eval_precision_macro": 0.8710561256381226, "eval_precision_micro": 0.8613728129205922, "eval_recall_macro": 0.8541696546910839, "eval_recall_micro": 0.8613728129205922, "eval_runtime": 14.7062, "eval_samples_per_second": 50.523, "eval_steps_per_second": 3.196, "step": 3000 }, { "epoch": 0.762029174831265, "grad_norm": 211605.15625, "learning_rate": 7.802437141773096e-05, "loss": 0.4028, "step": 3500 }, { "epoch": 0.762029174831265, "eval_accuracy": 0.8950201884253028, "eval_f1_macro": 0.8917785158702655, "eval_f1_micro": 0.8950201884253028, "eval_loss": 0.25510504841804504, "eval_precision_macro": 0.9107123575695487, "eval_precision_micro": 0.8950201884253028, "eval_recall_macro": 0.9057010565367906, "eval_recall_micro": 0.8950201884253028, "eval_runtime": 14.7188, "eval_samples_per_second": 50.48, "eval_steps_per_second": 3.193, "step": 3500 }, { "epoch": 0.8708904855214457, "grad_norm": 37626.74609375, "learning_rate": 7.667514252581752e-05, "loss": 0.3747, "step": 4000 }, { "epoch": 0.8708904855214457, "eval_accuracy": 0.892328398384926, "eval_f1_macro": 0.8948877387080549, "eval_f1_micro": 0.892328398384926, "eval_loss": 0.2622196674346924, "eval_precision_macro": 0.9437605053976897, "eval_precision_micro": 0.892328398384926, "eval_recall_macro": 0.9063603025064753, "eval_recall_micro": 0.892328398384926, "eval_runtime": 14.7613, "eval_samples_per_second": 50.334, "eval_steps_per_second": 3.184, "step": 4000 }, { "epoch": 0.9797517962116263, "grad_norm": 341548.65625, "learning_rate": 7.499514142009407e-05, "loss": 0.3686, "step": 4500 }, { "epoch": 0.9797517962116263, "eval_accuracy": 0.901749663526245, "eval_f1_macro": 0.9071958475193036, "eval_f1_micro": 0.9017496635262451, "eval_loss": 0.21770605444908142, "eval_precision_macro": 0.9392339212137314, "eval_precision_micro": 0.901749663526245, "eval_recall_macro": 0.9187280722751042, "eval_recall_micro": 0.901749663526245, "eval_runtime": 14.7411, "eval_samples_per_second": 50.403, "eval_steps_per_second": 3.188, "step": 4500 }, { "epoch": 1.088613106901807, "grad_norm": 51656.32421875, "learning_rate": 7.299951998946065e-05, "loss": 0.2762, "step": 5000 }, { "epoch": 1.088613106901807, "eval_accuracy": 0.8896366083445492, "eval_f1_macro": 0.8803954267807832, "eval_f1_micro": 0.8896366083445492, "eval_loss": 0.37781140208244324, "eval_precision_macro": 0.8980066417509999, "eval_precision_micro": 0.8896366083445492, "eval_recall_macro": 0.8882222866157216, "eval_recall_micro": 0.8896366083445492, "eval_runtime": 15.0879, "eval_samples_per_second": 49.245, "eval_steps_per_second": 3.115, "step": 5000 }, { "epoch": 1.1974744175919878, "grad_norm": 1009913.0625, "learning_rate": 7.070627669481137e-05, "loss": 0.2851, "step": 5500 }, { "epoch": 1.1974744175919878, "eval_accuracy": 0.882907133243607, "eval_f1_macro": 0.8672894626796113, "eval_f1_micro": 0.882907133243607, "eval_loss": 0.38583362102508545, "eval_precision_macro": 0.9049625152940963, "eval_precision_micro": 0.882907133243607, "eval_recall_macro": 0.8813935878782198, "eval_recall_micro": 0.882907133243607, "eval_runtime": 14.7029, "eval_samples_per_second": 50.534, "eval_steps_per_second": 3.197, "step": 5500 }, { "epoch": 1.3063357282821686, "grad_norm": 26227.69140625, "learning_rate": 6.813609424135567e-05, "loss": 0.2818, "step": 6000 }, { "epoch": 1.3063357282821686, "eval_accuracy": 0.9138627187079408, "eval_f1_macro": 0.9250807107212078, "eval_f1_micro": 0.9138627187079408, "eval_loss": 0.1822730302810669, "eval_precision_macro": 0.9436200764635643, "eval_precision_micro": 0.9138627187079408, "eval_recall_macro": 0.9322277636580386, "eval_recall_micro": 0.9138627187079408, "eval_runtime": 14.7441, "eval_samples_per_second": 50.393, "eval_steps_per_second": 3.188, "step": 6000 }, { "epoch": 1.4151970389723492, "grad_norm": 87145.015625, "learning_rate": 6.531215304180572e-05, "loss": 0.2539, "step": 6500 }, { "epoch": 1.4151970389723492, "eval_accuracy": 0.9044414535666218, "eval_f1_macro": 0.9159118265135213, "eval_f1_micro": 0.9044414535666218, "eval_loss": 0.19744105637073517, "eval_precision_macro": 0.9248731430404993, "eval_precision_micro": 0.9044414535666218, "eval_recall_macro": 0.9361879615931227, "eval_recall_micro": 0.9044414535666218, "eval_runtime": 14.7205, "eval_samples_per_second": 50.474, "eval_steps_per_second": 3.193, "step": 6500 }, { "epoch": 1.52405834966253, "grad_norm": 4197689.5, "learning_rate": 6.22599221528008e-05, "loss": 0.2342, "step": 7000 }, { "epoch": 1.52405834966253, "eval_accuracy": 0.9152086137281292, "eval_f1_macro": 0.9209521774588028, "eval_f1_micro": 0.9152086137281292, "eval_loss": 0.16721387207508087, "eval_precision_macro": 0.9316385374819118, "eval_precision_micro": 0.9152086137281292, "eval_recall_macro": 0.9305594066426393, "eval_recall_micro": 0.9152086137281292, "eval_runtime": 14.7185, "eval_samples_per_second": 50.481, "eval_steps_per_second": 3.193, "step": 7000 }, { "epoch": 1.6329196603527105, "grad_norm": 29691.1875, "learning_rate": 5.900692957010821e-05, "loss": 0.2658, "step": 7500 }, { "epoch": 1.6329196603527105, "eval_accuracy": 0.9205921938088829, "eval_f1_macro": 0.9292673927082579, "eval_f1_micro": 0.9205921938088829, "eval_loss": 0.16926071047782898, "eval_precision_macro": 0.9467601029387086, "eval_precision_micro": 0.9205921938088829, "eval_recall_macro": 0.9353857192023052, "eval_recall_micro": 0.9205921938088829, "eval_runtime": 14.7038, "eval_samples_per_second": 50.531, "eval_steps_per_second": 3.196, "step": 7500 }, { "epoch": 1.7417809710428913, "grad_norm": 82702.546875, "learning_rate": 5.5582513954302386e-05, "loss": 0.2703, "step": 8000 }, { "epoch": 1.7417809710428913, "eval_accuracy": 0.917900403768506, "eval_f1_macro": 0.9205592899943698, "eval_f1_micro": 0.917900403768506, "eval_loss": 0.22037993371486664, "eval_precision_macro": 0.9459349396324186, "eval_precision_micro": 0.917900403768506, "eval_recall_macro": 0.9278516945604416, "eval_recall_micro": 0.917900403768506, "eval_runtime": 14.7085, "eval_samples_per_second": 50.515, "eval_steps_per_second": 3.195, "step": 8000 }, { "epoch": 1.850642281733072, "grad_norm": 450699.1875, "learning_rate": 5.201756002610252e-05, "loss": 0.2566, "step": 8500 }, { "epoch": 1.850642281733072, "eval_accuracy": 0.9098250336473755, "eval_f1_macro": 0.9126391472355347, "eval_f1_micro": 0.9098250336473755, "eval_loss": 0.26449093222618103, "eval_precision_macro": 0.9352643525302922, "eval_precision_micro": 0.9098250336473755, "eval_recall_macro": 0.931955435163728, "eval_recall_micro": 0.9098250336473755, "eval_runtime": 14.6939, "eval_samples_per_second": 50.565, "eval_steps_per_second": 3.199, "step": 8500 }, { "epoch": 1.959503592423253, "grad_norm": 37148.73046875, "learning_rate": 4.834422001783138e-05, "loss": 0.2242, "step": 9000 }, { "epoch": 1.959503592423253, "eval_accuracy": 0.9246298788694481, "eval_f1_macro": 0.9278695233625198, "eval_f1_micro": 0.9246298788694481, "eval_loss": 0.20524874329566956, "eval_precision_macro": 0.9473174570200222, "eval_precision_micro": 0.9246298788694481, "eval_recall_macro": 0.9317137486146517, "eval_recall_micro": 0.9246298788694481, "eval_runtime": 14.65, "eval_samples_per_second": 50.717, "eval_steps_per_second": 3.208, "step": 9000 }, { "epoch": 2.0683649031134337, "grad_norm": 65893.8984375, "learning_rate": 4.45956236932181e-05, "loss": 0.1672, "step": 9500 }, { "epoch": 2.0683649031134337, "eval_accuracy": 0.9165545087483177, "eval_f1_macro": 0.9239702133396492, "eval_f1_micro": 0.9165545087483177, "eval_loss": 0.3571414351463318, "eval_precision_macro": 0.9412785975210729, "eval_precision_micro": 0.9165545087483177, "eval_recall_macro": 0.9173054563259597, "eval_recall_micro": 0.9165545087483177, "eval_runtime": 14.749, "eval_samples_per_second": 50.376, "eval_steps_per_second": 3.187, "step": 9500 }, { "epoch": 2.177226213803614, "grad_norm": 20243.5546875, "learning_rate": 4.0805579550869046e-05, "loss": 0.1593, "step": 10000 }, { "epoch": 2.177226213803614, "eval_accuracy": 0.9125168236877523, "eval_f1_macro": 0.9238184226911409, "eval_f1_micro": 0.9125168236877523, "eval_loss": 0.30988800525665283, "eval_precision_macro": 0.9555289484815556, "eval_precision_micro": 0.9125168236877523, "eval_recall_macro": 0.9275764985418137, "eval_recall_micro": 0.9125168236877523, "eval_runtime": 15.0155, "eval_samples_per_second": 49.482, "eval_steps_per_second": 3.13, "step": 10000 }, { "epoch": 2.286087524493795, "grad_norm": 33157.19140625, "learning_rate": 3.7008269906245454e-05, "loss": 0.1799, "step": 10500 }, { "epoch": 2.286087524493795, "eval_accuracy": 0.9246298788694481, "eval_f1_macro": 0.9287251727049811, "eval_f1_micro": 0.9246298788694481, "eval_loss": 0.23414301872253418, "eval_precision_macro": 0.959944603131214, "eval_precision_micro": 0.9246298788694481, "eval_recall_macro": 0.9306134629626335, "eval_recall_micro": 0.9246298788694481, "eval_runtime": 14.6983, "eval_samples_per_second": 50.55, "eval_steps_per_second": 3.198, "step": 10500 }, { "epoch": 2.3949488351839756, "grad_norm": 48777.84375, "learning_rate": 3.323794260219589e-05, "loss": 0.166, "step": 11000 }, { "epoch": 2.3949488351839756, "eval_accuracy": 0.9057873485868102, "eval_f1_macro": 0.9123153410480982, "eval_f1_micro": 0.9057873485868102, "eval_loss": 0.3453662395477295, "eval_precision_macro": 0.9446104426733389, "eval_precision_micro": 0.9057873485868102, "eval_recall_macro": 0.91935239522038, "eval_recall_micro": 0.9057873485868102, "eval_runtime": 14.7404, "eval_samples_per_second": 50.406, "eval_steps_per_second": 3.189, "step": 11000 }, { "epoch": 2.5038101458741564, "grad_norm": 33563.56640625, "learning_rate": 2.9528602128499004e-05, "loss": 0.162, "step": 11500 }, { "epoch": 2.5038101458741564, "eval_accuracy": 0.9098250336473755, "eval_f1_macro": 0.9212878627631594, "eval_f1_micro": 0.9098250336473755, "eval_loss": 0.22809743881225586, "eval_precision_macro": 0.9389309808956737, "eval_precision_micro": 0.9098250336473755, "eval_recall_macro": 0.9311247877025975, "eval_recall_micro": 0.9098250336473755, "eval_runtime": 14.666, "eval_samples_per_second": 50.661, "eval_steps_per_second": 3.205, "step": 11500 }, { "epoch": 2.612671456564337, "grad_norm": 58977.125, "learning_rate": 2.591370293620146e-05, "loss": 0.1452, "step": 12000 }, { "epoch": 2.612671456564337, "eval_accuracy": 0.9219380888290714, "eval_f1_macro": 0.9232635700162879, "eval_f1_micro": 0.9219380888290714, "eval_loss": 0.2860707640647888, "eval_precision_macro": 0.9426347574998575, "eval_precision_micro": 0.9219380888290714, "eval_recall_macro": 0.9262974863930373, "eval_recall_micro": 0.9219380888290714, "eval_runtime": 14.8095, "eval_samples_per_second": 50.171, "eval_steps_per_second": 3.174, "step": 12000 }, { "epoch": 2.7215327672545175, "grad_norm": 46900.25390625, "learning_rate": 2.2425847712741887e-05, "loss": 0.1418, "step": 12500 }, { "epoch": 2.7215327672545175, "eval_accuracy": 0.9286675639300135, "eval_f1_macro": 0.9357990563843356, "eval_f1_micro": 0.9286675639300135, "eval_loss": 0.15669873356819153, "eval_precision_macro": 0.9529768865317036, "eval_precision_micro": 0.9286675639300135, "eval_recall_macro": 0.9417303559122717, "eval_recall_micro": 0.9286675639300135, "eval_runtime": 14.7072, "eval_samples_per_second": 50.52, "eval_steps_per_second": 3.196, "step": 12500 }, { "epoch": 2.8303940779446983, "grad_norm": 37592.3515625, "learning_rate": 1.9096493339109878e-05, "loss": 0.1429, "step": 13000 }, { "epoch": 2.8303940779446983, "eval_accuracy": 0.9165545087483177, "eval_f1_macro": 0.9295728643158702, "eval_f1_micro": 0.9165545087483177, "eval_loss": 0.22479559481143951, "eval_precision_macro": 0.9605098350591709, "eval_precision_micro": 0.9165545087483177, "eval_recall_macro": 0.9328126952515738, "eval_recall_micro": 0.9165545087483177, "eval_runtime": 14.6901, "eval_samples_per_second": 50.578, "eval_steps_per_second": 3.199, "step": 13000 }, { "epoch": 2.939255388634879, "grad_norm": 79597.40625, "learning_rate": 1.5955667181005554e-05, "loss": 0.1293, "step": 13500 }, { "epoch": 2.939255388634879, "eval_accuracy": 0.9246298788694481, "eval_f1_macro": 0.9319848397676713, "eval_f1_micro": 0.9246298788694481, "eval_loss": 0.27543124556541443, "eval_precision_macro": 0.9589344708678029, "eval_precision_micro": 0.9246298788694481, "eval_recall_macro": 0.932925082879603, "eval_recall_micro": 0.9246298788694481, "eval_runtime": 14.726, "eval_samples_per_second": 50.455, "eval_steps_per_second": 3.192, "step": 13500 }, { "epoch": 3.04811669932506, "grad_norm": 25773.66796875, "learning_rate": 1.3031696272762192e-05, "loss": 0.1137, "step": 14000 }, { "epoch": 3.04811669932506, "eval_accuracy": 0.9246298788694481, "eval_f1_macro": 0.937910042741771, "eval_f1_micro": 0.9246298788694481, "eval_loss": 0.20125848054885864, "eval_precision_macro": 0.9546735463378956, "eval_precision_micro": 0.9246298788694481, "eval_recall_macro": 0.9429177293988182, "eval_recall_micro": 0.9246298788694481, "eval_runtime": 15.0054, "eval_samples_per_second": 49.515, "eval_steps_per_second": 3.132, "step": 14000 }, { "epoch": 3.1569780100152407, "grad_norm": 17888.46484375, "learning_rate": 1.0350951836516297e-05, "loss": 0.0987, "step": 14500 }, { "epoch": 3.1569780100152407, "eval_accuracy": 0.9232839838492598, "eval_f1_macro": 0.9266276405829272, "eval_f1_micro": 0.9232839838492598, "eval_loss": 0.29369959235191345, "eval_precision_macro": 0.9436536313571009, "eval_precision_micro": 0.9232839838492598, "eval_recall_macro": 0.9283196203410136, "eval_recall_micro": 0.9232839838492598, "eval_runtime": 14.7764, "eval_samples_per_second": 50.283, "eval_steps_per_second": 3.181, "step": 14500 }, { "epoch": 3.265839320705421, "grad_norm": 85828.9375, "learning_rate": 7.9376114407998e-06, "loss": 0.0859, "step": 15000 }, { "epoch": 3.265839320705421, "eval_accuracy": 0.9246298788694481, "eval_f1_macro": 0.9402166974265765, "eval_f1_micro": 0.9246298788694481, "eval_loss": 0.17889092862606049, "eval_precision_macro": 0.9685045177945787, "eval_precision_micro": 0.9246298788694481, "eval_recall_macro": 0.9463450172046672, "eval_recall_micro": 0.9246298788694481, "eval_runtime": 14.7495, "eval_samples_per_second": 50.375, "eval_steps_per_second": 3.187, "step": 15000 }, { "epoch": 3.374700631395602, "grad_norm": 146288.75, "learning_rate": 5.813440943640527e-06, "loss": 0.0857, "step": 15500 }, { "epoch": 3.374700631395602, "eval_accuracy": 0.927321668909825, "eval_f1_macro": 0.9434096975688787, "eval_f1_micro": 0.927321668909825, "eval_loss": 0.16961060464382172, "eval_precision_macro": 0.9641802881027017, "eval_precision_micro": 0.927321668909825, "eval_recall_macro": 0.9472331991452233, "eval_recall_micro": 0.927321668909825, "eval_runtime": 14.7305, "eval_samples_per_second": 50.44, "eval_steps_per_second": 3.191, "step": 15500 } ], "logging_steps": 500, "max_steps": 18372, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.52555679969065e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }