| { | |
| "best_metric": 0.8982632597992836, | |
| "best_model_checkpoint": "./arabert_author_model/checkpoint-34500", | |
| "epoch": 3.9863325740318905, | |
| "eval_steps": 500, | |
| "global_step": 35000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05694760820045558, | |
| "grad_norm": 15.19107437133789, | |
| "learning_rate": 1.1343963553530753e-05, | |
| "loss": 2.7205, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05694760820045558, | |
| "eval_accuracy": 0.3750300697618475, | |
| "eval_f1_macro": 0.28596660166175775, | |
| "eval_f1_micro": 0.3750300697618475, | |
| "eval_loss": 2.32218337059021, | |
| "eval_precision_macro": 0.3865217733215992, | |
| "eval_precision_micro": 0.3750300697618475, | |
| "eval_recall_macro": 0.3672924033460141, | |
| "eval_recall_micro": 0.3750300697618475, | |
| "eval_runtime": 9.1613, | |
| "eval_samples_per_second": 453.755, | |
| "eval_steps_per_second": 56.76, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11389521640091116, | |
| "grad_norm": 21.725738525390625, | |
| "learning_rate": 2.2710706150341686e-05, | |
| "loss": 1.6025, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.11389521640091116, | |
| "eval_accuracy": 0.4892951647822949, | |
| "eval_f1_macro": 0.4406771878616986, | |
| "eval_f1_micro": 0.4892951647822949, | |
| "eval_loss": 1.6631975173950195, | |
| "eval_precision_macro": 0.56733686366756, | |
| "eval_precision_micro": 0.4892951647822949, | |
| "eval_recall_macro": 0.48704653807602066, | |
| "eval_recall_micro": 0.4892951647822949, | |
| "eval_runtime": 8.477, | |
| "eval_samples_per_second": 490.385, | |
| "eval_steps_per_second": 61.342, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.17084282460136674, | |
| "grad_norm": 13.716214179992676, | |
| "learning_rate": 3.4100227790432806e-05, | |
| "loss": 1.0328, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.17084282460136674, | |
| "eval_accuracy": 0.6615347606446957, | |
| "eval_f1_macro": 0.5830637518928246, | |
| "eval_f1_micro": 0.6615347606446957, | |
| "eval_loss": 1.1024270057678223, | |
| "eval_precision_macro": 0.697472543022928, | |
| "eval_precision_micro": 0.6615347606446957, | |
| "eval_recall_macro": 0.6033302465966366, | |
| "eval_recall_micro": 0.6615347606446957, | |
| "eval_runtime": 8.4612, | |
| "eval_samples_per_second": 491.299, | |
| "eval_steps_per_second": 61.457, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.22779043280182232, | |
| "grad_norm": 17.078737258911133, | |
| "learning_rate": 4.548974943052392e-05, | |
| "loss": 0.8504, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.22779043280182232, | |
| "eval_accuracy": 0.639162857830166, | |
| "eval_f1_macro": 0.5476482518700577, | |
| "eval_f1_micro": 0.639162857830166, | |
| "eval_loss": 1.2488256692886353, | |
| "eval_precision_macro": 0.668531963924838, | |
| "eval_precision_micro": 0.639162857830166, | |
| "eval_recall_macro": 0.5729925283587635, | |
| "eval_recall_micro": 0.639162857830166, | |
| "eval_runtime": 8.6485, | |
| "eval_samples_per_second": 480.664, | |
| "eval_steps_per_second": 60.126, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2847380410022779, | |
| "grad_norm": 45.41240692138672, | |
| "learning_rate": 5.6856492027334856e-05, | |
| "loss": 0.7882, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2847380410022779, | |
| "eval_accuracy": 0.6523935530430599, | |
| "eval_f1_macro": 0.5994264876841328, | |
| "eval_f1_micro": 0.6523935530430599, | |
| "eval_loss": 1.2463157176971436, | |
| "eval_precision_macro": 0.6529647074997418, | |
| "eval_precision_micro": 0.6523935530430599, | |
| "eval_recall_macro": 0.6436371671271747, | |
| "eval_recall_micro": 0.6523935530430599, | |
| "eval_runtime": 8.4613, | |
| "eval_samples_per_second": 491.293, | |
| "eval_steps_per_second": 61.456, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.3416856492027335, | |
| "grad_norm": 17.272947311401367, | |
| "learning_rate": 6.824601366742597e-05, | |
| "loss": 0.8087, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3416856492027335, | |
| "eval_accuracy": 0.7055568919894154, | |
| "eval_f1_macro": 0.6265017927625092, | |
| "eval_f1_micro": 0.7055568919894154, | |
| "eval_loss": 1.2766073942184448, | |
| "eval_precision_macro": 0.6878658131219406, | |
| "eval_precision_micro": 0.7055568919894154, | |
| "eval_recall_macro": 0.6658140911489481, | |
| "eval_recall_micro": 0.7055568919894154, | |
| "eval_runtime": 8.9536, | |
| "eval_samples_per_second": 464.281, | |
| "eval_steps_per_second": 58.077, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.39863325740318906, | |
| "grad_norm": 36.27668380737305, | |
| "learning_rate": 7.96355353075171e-05, | |
| "loss": 0.7887, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.39863325740318906, | |
| "eval_accuracy": 0.7149386576858311, | |
| "eval_f1_macro": 0.6616726950770662, | |
| "eval_f1_micro": 0.7149386576858311, | |
| "eval_loss": 1.1627144813537598, | |
| "eval_precision_macro": 0.7514554135519156, | |
| "eval_precision_micro": 0.7149386576858311, | |
| "eval_recall_macro": 0.6834968200252539, | |
| "eval_recall_micro": 0.7149386576858311, | |
| "eval_runtime": 8.4816, | |
| "eval_samples_per_second": 490.118, | |
| "eval_steps_per_second": 61.309, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.45558086560364464, | |
| "grad_norm": 3.0386502742767334, | |
| "learning_rate": 7.995372539966228e-05, | |
| "loss": 0.7357, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.45558086560364464, | |
| "eval_accuracy": 0.7611258118835699, | |
| "eval_f1_macro": 0.6745353046335729, | |
| "eval_f1_micro": 0.7611258118835699, | |
| "eval_loss": 0.9807717800140381, | |
| "eval_precision_macro": 0.7007756446431578, | |
| "eval_precision_micro": 0.7611258118835699, | |
| "eval_recall_macro": 0.7425048520300992, | |
| "eval_recall_micro": 0.7611258118835699, | |
| "eval_runtime": 8.4828, | |
| "eval_samples_per_second": 490.05, | |
| "eval_steps_per_second": 61.3, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5125284738041003, | |
| "grad_norm": 2.7381744384765625, | |
| "learning_rate": 7.980884762455173e-05, | |
| "loss": 0.7022, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5125284738041003, | |
| "eval_accuracy": 0.76088525378879, | |
| "eval_f1_macro": 0.6871238458955197, | |
| "eval_f1_micro": 0.76088525378879, | |
| "eval_loss": 1.1158560514450073, | |
| "eval_precision_macro": 0.7591168248124284, | |
| "eval_precision_micro": 0.76088525378879, | |
| "eval_recall_macro": 0.7099914791461782, | |
| "eval_recall_micro": 0.76088525378879, | |
| "eval_runtime": 9.0538, | |
| "eval_samples_per_second": 459.145, | |
| "eval_steps_per_second": 57.435, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5694760820045558, | |
| "grad_norm": 32.207054138183594, | |
| "learning_rate": 7.956625791551662e-05, | |
| "loss": 0.6587, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5694760820045558, | |
| "eval_accuracy": 0.8008178975222516, | |
| "eval_f1_macro": 0.76243086871798, | |
| "eval_f1_micro": 0.8008178975222516, | |
| "eval_loss": 0.8932181000709534, | |
| "eval_precision_macro": 0.7876270109291943, | |
| "eval_precision_micro": 0.8008178975222516, | |
| "eval_recall_macro": 0.7876260076814952, | |
| "eval_recall_micro": 0.8008178975222516, | |
| "eval_runtime": 8.5148, | |
| "eval_samples_per_second": 488.207, | |
| "eval_steps_per_second": 61.07, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.6264236902050114, | |
| "grad_norm": 0.29054978489875793, | |
| "learning_rate": 7.922558317223566e-05, | |
| "loss": 0.5719, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.6264236902050114, | |
| "eval_accuracy": 0.773875390906904, | |
| "eval_f1_macro": 0.7249768597960354, | |
| "eval_f1_micro": 0.773875390906904, | |
| "eval_loss": 0.9966481328010559, | |
| "eval_precision_macro": 0.7358501532705917, | |
| "eval_precision_micro": 0.773875390906904, | |
| "eval_recall_macro": 0.7614420126439011, | |
| "eval_recall_micro": 0.773875390906904, | |
| "eval_runtime": 8.4793, | |
| "eval_samples_per_second": 490.251, | |
| "eval_steps_per_second": 61.326, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.683371298405467, | |
| "grad_norm": 1.0872896909713745, | |
| "learning_rate": 7.878805260363261e-05, | |
| "loss": 0.6425, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.683371298405467, | |
| "eval_accuracy": 0.771710368053885, | |
| "eval_f1_macro": 0.7254677224325083, | |
| "eval_f1_micro": 0.771710368053885, | |
| "eval_loss": 0.9605371952056885, | |
| "eval_precision_macro": 0.8005336996598393, | |
| "eval_precision_micro": 0.771710368053885, | |
| "eval_recall_macro": 0.7547512504988463, | |
| "eval_recall_micro": 0.771710368053885, | |
| "eval_runtime": 8.4743, | |
| "eval_samples_per_second": 490.54, | |
| "eval_steps_per_second": 61.362, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.7403189066059226, | |
| "grad_norm": 189.1195068359375, | |
| "learning_rate": 7.82547465603587e-05, | |
| "loss": 0.5433, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.7403189066059226, | |
| "eval_accuracy": 0.7700264613904257, | |
| "eval_f1_macro": 0.740451647791416, | |
| "eval_f1_micro": 0.7700264613904257, | |
| "eval_loss": 1.0783036947250366, | |
| "eval_precision_macro": 0.769666104173451, | |
| "eval_precision_micro": 0.7700264613904257, | |
| "eval_recall_macro": 0.7659213860833466, | |
| "eval_recall_micro": 0.7700264613904257, | |
| "eval_runtime": 8.4608, | |
| "eval_samples_per_second": 491.328, | |
| "eval_steps_per_second": 61.46, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.7972665148063781, | |
| "grad_norm": 0.5625237822532654, | |
| "learning_rate": 7.762833068916386e-05, | |
| "loss": 0.5736, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7972665148063781, | |
| "eval_accuracy": 0.7782054366129421, | |
| "eval_f1_macro": 0.6779030343365783, | |
| "eval_f1_micro": 0.7782054366129421, | |
| "eval_loss": 1.0509027242660522, | |
| "eval_precision_macro": 0.7339555194909612, | |
| "eval_precision_micro": 0.7782054366129421, | |
| "eval_recall_macro": 0.6828886626680506, | |
| "eval_recall_micro": 0.7782054366129421, | |
| "eval_runtime": 8.481, | |
| "eval_samples_per_second": 490.156, | |
| "eval_steps_per_second": 61.314, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.8542141230068337, | |
| "grad_norm": 64.8875503540039, | |
| "learning_rate": 7.690784156928418e-05, | |
| "loss": 0.5273, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.8542141230068337, | |
| "eval_accuracy": 0.7774837623286024, | |
| "eval_f1_macro": 0.7006759117702724, | |
| "eval_f1_micro": 0.7774837623286024, | |
| "eval_loss": 1.083090901374817, | |
| "eval_precision_macro": 0.7615638623524419, | |
| "eval_precision_micro": 0.7774837623286024, | |
| "eval_recall_macro": 0.7385809772355115, | |
| "eval_recall_micro": 0.7774837623286024, | |
| "eval_runtime": 8.4704, | |
| "eval_samples_per_second": 490.769, | |
| "eval_steps_per_second": 61.39, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9111617312072893, | |
| "grad_norm": 0.9078112840652466, | |
| "learning_rate": 7.609621959255558e-05, | |
| "loss": 0.5268, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.9111617312072893, | |
| "eval_accuracy": 0.7445273033437575, | |
| "eval_f1_macro": 0.699580887402278, | |
| "eval_f1_micro": 0.7445273033437575, | |
| "eval_loss": 1.3979923725128174, | |
| "eval_precision_macro": 0.7001048011829232, | |
| "eval_precision_micro": 0.7445273033437575, | |
| "eval_recall_macro": 0.7726266308290096, | |
| "eval_recall_micro": 0.7445273033437575, | |
| "eval_runtime": 8.4384, | |
| "eval_samples_per_second": 492.631, | |
| "eval_steps_per_second": 61.623, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.9681093394077449, | |
| "grad_norm": 42.65549850463867, | |
| "learning_rate": 7.519735782617663e-05, | |
| "loss": 0.5462, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.9681093394077449, | |
| "eval_accuracy": 0.8287226365167187, | |
| "eval_f1_macro": 0.7785313462283256, | |
| "eval_f1_micro": 0.8287226365167187, | |
| "eval_loss": 0.9026873111724854, | |
| "eval_precision_macro": 0.7868255645583773, | |
| "eval_precision_micro": 0.8287226365167187, | |
| "eval_recall_macro": 0.8024053906273642, | |
| "eval_recall_micro": 0.8287226365167187, | |
| "eval_runtime": 8.4335, | |
| "eval_samples_per_second": 492.916, | |
| "eval_steps_per_second": 61.659, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.0250569476082005, | |
| "grad_norm": 0.9462873935699463, | |
| "learning_rate": 7.420987383057407e-05, | |
| "loss": 0.486, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.0250569476082005, | |
| "eval_accuracy": 0.7211931681501083, | |
| "eval_f1_macro": 0.6932948143152837, | |
| "eval_f1_micro": 0.7211931681501083, | |
| "eval_loss": 1.7263842821121216, | |
| "eval_precision_macro": 0.7360552900892146, | |
| "eval_precision_micro": 0.7211931681501083, | |
| "eval_recall_macro": 0.7409941847523269, | |
| "eval_recall_micro": 0.7211931681501083, | |
| "eval_runtime": 8.473, | |
| "eval_samples_per_second": 490.615, | |
| "eval_steps_per_second": 61.371, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.082004555808656, | |
| "grad_norm": 0.8366897106170654, | |
| "learning_rate": 7.314014528807089e-05, | |
| "loss": 0.4458, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.082004555808656, | |
| "eval_accuracy": 0.8186191965359635, | |
| "eval_f1_macro": 0.7675006381723768, | |
| "eval_f1_micro": 0.8186191965359635, | |
| "eval_loss": 0.9773014783859253, | |
| "eval_precision_macro": 0.7844621076644617, | |
| "eval_precision_micro": 0.8186191965359635, | |
| "eval_recall_macro": 0.8041851318048103, | |
| "eval_recall_micro": 0.8186191965359635, | |
| "eval_runtime": 8.4706, | |
| "eval_samples_per_second": 490.754, | |
| "eval_steps_per_second": 61.389, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.1389521640091116, | |
| "grad_norm": 2.115098237991333, | |
| "learning_rate": 7.198652696785955e-05, | |
| "loss": 0.4102, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.1389521640091116, | |
| "eval_accuracy": 0.8316093336540774, | |
| "eval_f1_macro": 0.7933624073358755, | |
| "eval_f1_micro": 0.8316093336540774, | |
| "eval_loss": 0.967036247253418, | |
| "eval_precision_macro": 0.7949251475033909, | |
| "eval_precision_micro": 0.8316093336540774, | |
| "eval_recall_macro": 0.8274655786829063, | |
| "eval_recall_micro": 0.8316093336540774, | |
| "eval_runtime": 8.4509, | |
| "eval_samples_per_second": 491.9, | |
| "eval_steps_per_second": 61.532, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.1958997722095672, | |
| "grad_norm": 1.0413740873336792, | |
| "learning_rate": 7.075392750273938e-05, | |
| "loss": 0.3773, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.1958997722095672, | |
| "eval_accuracy": 0.8171758479672842, | |
| "eval_f1_macro": 0.7981622962245145, | |
| "eval_f1_micro": 0.8171758479672842, | |
| "eval_loss": 0.9198176860809326, | |
| "eval_precision_macro": 0.819291401144176, | |
| "eval_precision_micro": 0.8171758479672842, | |
| "eval_recall_macro": 0.8245482183363974, | |
| "eval_recall_micro": 0.8171758479672842, | |
| "eval_runtime": 8.4577, | |
| "eval_samples_per_second": 491.505, | |
| "eval_steps_per_second": 61.482, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.2528473804100229, | |
| "grad_norm": 7.1126203536987305, | |
| "learning_rate": 6.94453904277921e-05, | |
| "loss": 0.3796, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.2528473804100229, | |
| "eval_accuracy": 0.7741159490016839, | |
| "eval_f1_macro": 0.7334267874337288, | |
| "eval_f1_micro": 0.7741159490016839, | |
| "eval_loss": 1.324471116065979, | |
| "eval_precision_macro": 0.7554977548108798, | |
| "eval_precision_micro": 0.7741159490016839, | |
| "eval_recall_macro": 0.7865189643712818, | |
| "eval_recall_micro": 0.7741159490016839, | |
| "eval_runtime": 8.4546, | |
| "eval_samples_per_second": 491.688, | |
| "eval_steps_per_second": 61.505, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.3097949886104785, | |
| "grad_norm": 85.53689575195312, | |
| "learning_rate": 6.806414678327537e-05, | |
| "loss": 0.4432, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.3097949886104785, | |
| "eval_accuracy": 0.8092374308395478, | |
| "eval_f1_macro": 0.7438442519057924, | |
| "eval_f1_micro": 0.8092374308395478, | |
| "eval_loss": 1.1105079650878906, | |
| "eval_precision_macro": 0.7748012629794462, | |
| "eval_precision_micro": 0.8092374308395478, | |
| "eval_recall_macro": 0.7783932685192183, | |
| "eval_recall_micro": 0.8092374308395478, | |
| "eval_runtime": 8.4968, | |
| "eval_samples_per_second": 489.244, | |
| "eval_steps_per_second": 61.2, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.366742596810934, | |
| "grad_norm": 1.0569897890090942, | |
| "learning_rate": 6.661360713653681e-05, | |
| "loss": 0.389, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.366742596810934, | |
| "eval_accuracy": 0.8128458022612461, | |
| "eval_f1_macro": 0.7595338289100665, | |
| "eval_f1_micro": 0.8128458022612461, | |
| "eval_loss": 1.0267034769058228, | |
| "eval_precision_macro": 0.7590761169934961, | |
| "eval_precision_micro": 0.8128458022612461, | |
| "eval_recall_macro": 0.8036606747506161, | |
| "eval_recall_micro": 0.8128458022612461, | |
| "eval_runtime": 8.4648, | |
| "eval_samples_per_second": 491.09, | |
| "eval_steps_per_second": 61.431, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.4236902050113895, | |
| "grad_norm": 14.686357498168945, | |
| "learning_rate": 6.509735316063996e-05, | |
| "loss": 0.6087, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.4236902050113895, | |
| "eval_accuracy": 0.8443589126774116, | |
| "eval_f1_macro": 0.7997974993396904, | |
| "eval_f1_micro": 0.8443589126774116, | |
| "eval_loss": 0.9193519353866577, | |
| "eval_precision_macro": 0.7894219117254144, | |
| "eval_precision_micro": 0.8443589126774116, | |
| "eval_recall_macro": 0.833108777347993, | |
| "eval_recall_micro": 0.8443589126774116, | |
| "eval_runtime": 8.4379, | |
| "eval_samples_per_second": 492.657, | |
| "eval_steps_per_second": 61.627, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.4806378132118452, | |
| "grad_norm": 0.05014890432357788, | |
| "learning_rate": 6.352234452003862e-05, | |
| "loss": 0.4803, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.4806378132118452, | |
| "eval_accuracy": 0.8535001202790474, | |
| "eval_f1_macro": 0.8265458036830905, | |
| "eval_f1_micro": 0.8535001202790474, | |
| "eval_loss": 0.879317581653595, | |
| "eval_precision_macro": 0.8329036145166532, | |
| "eval_precision_micro": 0.8535001202790474, | |
| "eval_recall_macro": 0.8340628674797995, | |
| "eval_recall_micro": 0.8535001202790474, | |
| "eval_runtime": 8.4157, | |
| "eval_samples_per_second": 493.96, | |
| "eval_steps_per_second": 61.79, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.5375854214123006, | |
| "grad_norm": 40.28041076660156, | |
| "learning_rate": 6.188948654276723e-05, | |
| "loss": 0.5513, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.5375854214123006, | |
| "eval_accuracy": 0.7782054366129421, | |
| "eval_f1_macro": 0.7066835334217871, | |
| "eval_f1_micro": 0.7782054366129421, | |
| "eval_loss": 1.1909141540527344, | |
| "eval_precision_macro": 0.7255202837659691, | |
| "eval_precision_micro": 0.7782054366129421, | |
| "eval_recall_macro": 0.7402758652568006, | |
| "eval_recall_micro": 0.7782054366129421, | |
| "eval_runtime": 8.4835, | |
| "eval_samples_per_second": 490.012, | |
| "eval_steps_per_second": 61.296, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.5945330296127562, | |
| "grad_norm": 8.929847717285156, | |
| "learning_rate": 6.019936353958699e-05, | |
| "loss": 0.693, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.5945330296127562, | |
| "eval_accuracy": 0.2460909309598268, | |
| "eval_f1_macro": 0.20336556842910194, | |
| "eval_f1_micro": 0.2460909309598268, | |
| "eval_loss": 2.8277578353881836, | |
| "eval_precision_macro": 0.5226671539146224, | |
| "eval_precision_micro": 0.2460909309598268, | |
| "eval_recall_macro": 0.1936798727163823, | |
| "eval_recall_micro": 0.2460909309598268, | |
| "eval_runtime": 8.4782, | |
| "eval_samples_per_second": 490.315, | |
| "eval_steps_per_second": 61.334, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.6514806378132119, | |
| "grad_norm": 28.1862735748291, | |
| "learning_rate": 5.8459364260048594e-05, | |
| "loss": 0.9646, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.6514806378132119, | |
| "eval_accuracy": 0.8246331489054607, | |
| "eval_f1_macro": 0.7913907456133474, | |
| "eval_f1_micro": 0.8246331489054607, | |
| "eval_loss": 0.948131799697876, | |
| "eval_precision_macro": 0.799701208597097, | |
| "eval_precision_micro": 0.8246331489054607, | |
| "eval_recall_macro": 0.8123299807423895, | |
| "eval_recall_micro": 0.8246331489054607, | |
| "eval_runtime": 8.4643, | |
| "eval_samples_per_second": 491.124, | |
| "eval_steps_per_second": 61.435, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.7084282460136673, | |
| "grad_norm": 7.083284854888916, | |
| "learning_rate": 5.6673785111054136e-05, | |
| "loss": 0.462, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.7084282460136673, | |
| "eval_accuracy": 0.8472456098147703, | |
| "eval_f1_macro": 0.8091993297668394, | |
| "eval_f1_micro": 0.8472456098147703, | |
| "eval_loss": 0.8667464256286621, | |
| "eval_precision_macro": 0.8198322314440802, | |
| "eval_precision_micro": 0.8472456098147703, | |
| "eval_recall_macro": 0.8266325847969769, | |
| "eval_recall_micro": 0.8472456098147703, | |
| "eval_runtime": 8.4589, | |
| "eval_samples_per_second": 491.435, | |
| "eval_steps_per_second": 61.474, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.7653758542141231, | |
| "grad_norm": 65.16443634033203, | |
| "learning_rate": 5.484703504533721e-05, | |
| "loss": 0.4093, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.7653758542141231, | |
| "eval_accuracy": 0.8116430117873467, | |
| "eval_f1_macro": 0.7700885656504642, | |
| "eval_f1_micro": 0.8116430117873467, | |
| "eval_loss": 1.1932649612426758, | |
| "eval_precision_macro": 0.8232095109547221, | |
| "eval_precision_micro": 0.8116430117873467, | |
| "eval_recall_macro": 0.775800293436143, | |
| "eval_recall_micro": 0.8116430117873467, | |
| "eval_runtime": 8.4735, | |
| "eval_samples_per_second": 490.589, | |
| "eval_steps_per_second": 61.368, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.8223234624145785, | |
| "grad_norm": 15.238636016845703, | |
| "learning_rate": 5.2983624674875084e-05, | |
| "loss": 0.349, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.8223234624145785, | |
| "eval_accuracy": 0.8799615107048352, | |
| "eval_f1_macro": 0.8377281287378512, | |
| "eval_f1_micro": 0.8799615107048352, | |
| "eval_loss": 0.7749123573303223, | |
| "eval_precision_macro": 0.8358272736538074, | |
| "eval_precision_micro": 0.8799615107048352, | |
| "eval_recall_macro": 0.8723899950094214, | |
| "eval_recall_micro": 0.8799615107048352, | |
| "eval_runtime": 8.4617, | |
| "eval_samples_per_second": 491.272, | |
| "eval_steps_per_second": 61.453, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.8792710706150342, | |
| "grad_norm": 1.931815505027771, | |
| "learning_rate": 5.108815513328386e-05, | |
| "loss": 0.3333, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.8792710706150342, | |
| "eval_accuracy": 0.8799615107048352, | |
| "eval_f1_macro": 0.851689807067872, | |
| "eval_f1_micro": 0.8799615107048352, | |
| "eval_loss": 0.6482954025268555, | |
| "eval_precision_macro": 0.8439980638748368, | |
| "eval_precision_micro": 0.8799615107048352, | |
| "eval_recall_macro": 0.8709009740944947, | |
| "eval_recall_micro": 0.8799615107048352, | |
| "eval_runtime": 8.4707, | |
| "eval_samples_per_second": 490.751, | |
| "eval_steps_per_second": 61.388, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.9362186788154898, | |
| "grad_norm": 14.734030723571777, | |
| "learning_rate": 4.916530671469754e-05, | |
| "loss": 0.3449, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.9362186788154898, | |
| "eval_accuracy": 0.7890305508780371, | |
| "eval_f1_macro": 0.7375164837600725, | |
| "eval_f1_micro": 0.7890305508780371, | |
| "eval_loss": 1.2130000591278076, | |
| "eval_precision_macro": 0.7696829158091395, | |
| "eval_precision_micro": 0.7890305508780371, | |
| "eval_recall_macro": 0.7545432686041814, | |
| "eval_recall_micro": 0.7890305508780371, | |
| "eval_runtime": 8.448, | |
| "eval_samples_per_second": 492.07, | |
| "eval_steps_per_second": 61.553, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.9931662870159452, | |
| "grad_norm": 4.0362091064453125, | |
| "learning_rate": 4.7219827317183907e-05, | |
| "loss": 0.2982, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.9931662870159452, | |
| "eval_accuracy": 0.8599951888381044, | |
| "eval_f1_macro": 0.8354955249556116, | |
| "eval_f1_micro": 0.8599951888381044, | |
| "eval_loss": 0.8803524374961853, | |
| "eval_precision_macro": 0.8342493543222153, | |
| "eval_precision_micro": 0.8599951888381044, | |
| "eval_recall_macro": 0.8556466165427711, | |
| "eval_recall_micro": 0.8599951888381044, | |
| "eval_runtime": 8.4723, | |
| "eval_samples_per_second": 490.66, | |
| "eval_steps_per_second": 61.377, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.050113895216401, | |
| "grad_norm": 0.7534123659133911, | |
| "learning_rate": 4.525652071923279e-05, | |
| "loss": 0.2348, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.050113895216401, | |
| "eval_accuracy": 0.8448400288669714, | |
| "eval_f1_macro": 0.8190720986060728, | |
| "eval_f1_micro": 0.8448400288669714, | |
| "eval_loss": 1.1245763301849365, | |
| "eval_precision_macro": 0.8337767355676724, | |
| "eval_precision_micro": 0.8448400288669714, | |
| "eval_recall_macro": 0.8557743090961959, | |
| "eval_recall_micro": 0.8448400288669714, | |
| "eval_runtime": 8.4721, | |
| "eval_samples_per_second": 490.667, | |
| "eval_steps_per_second": 61.378, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.1070615034168565, | |
| "grad_norm": 1.4927374124526978, | |
| "learning_rate": 4.328023471826429e-05, | |
| "loss": 0.2299, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.1070615034168565, | |
| "eval_accuracy": 0.8708203031031995, | |
| "eval_f1_macro": 0.8303241771039817, | |
| "eval_f1_micro": 0.8708203031031995, | |
| "eval_loss": 0.8329204320907593, | |
| "eval_precision_macro": 0.83267401976066, | |
| "eval_precision_micro": 0.8708203031031995, | |
| "eval_recall_macro": 0.8610797711013763, | |
| "eval_recall_micro": 0.8708203031031995, | |
| "eval_runtime": 8.4578, | |
| "eval_samples_per_second": 491.498, | |
| "eval_steps_per_second": 61.482, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.164009111617312, | |
| "grad_norm": 1.7383619546890259, | |
| "learning_rate": 4.129584916044555e-05, | |
| "loss": 0.2468, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.164009111617312, | |
| "eval_accuracy": 0.8773153716622565, | |
| "eval_f1_macro": 0.846177762458016, | |
| "eval_f1_micro": 0.8773153716622565, | |
| "eval_loss": 0.7664415836334229, | |
| "eval_precision_macro": 0.8368864405451267, | |
| "eval_precision_micro": 0.8773153716622565, | |
| "eval_recall_macro": 0.8780489137762446, | |
| "eval_recall_micro": 0.8773153716622565, | |
| "eval_runtime": 8.4381, | |
| "eval_samples_per_second": 492.645, | |
| "eval_steps_per_second": 61.625, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.2209567198177678, | |
| "grad_norm": 400.013916015625, | |
| "learning_rate": 3.930826389137262e-05, | |
| "loss": 0.1899, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.2209567198177678, | |
| "eval_accuracy": 0.8862160211691124, | |
| "eval_f1_macro": 0.8558563382552302, | |
| "eval_f1_micro": 0.8862160211691124, | |
| "eval_loss": 0.7495226263999939, | |
| "eval_precision_macro": 0.8555231307289006, | |
| "eval_precision_micro": 0.8862160211691124, | |
| "eval_recall_macro": 0.8758801511510593, | |
| "eval_recall_micro": 0.8862160211691124, | |
| "eval_runtime": 8.4445, | |
| "eval_samples_per_second": 492.274, | |
| "eval_steps_per_second": 61.579, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.277904328018223, | |
| "grad_norm": 2.0449647903442383, | |
| "learning_rate": 3.732635344608829e-05, | |
| "loss": 0.1977, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.277904328018223, | |
| "eval_accuracy": 0.8551840269425066, | |
| "eval_f1_macro": 0.8198359165864053, | |
| "eval_f1_micro": 0.8551840269425066, | |
| "eval_loss": 1.0149922370910645, | |
| "eval_precision_macro": 0.8328051392132652, | |
| "eval_precision_micro": 0.8551840269425066, | |
| "eval_recall_macro": 0.853902169918103, | |
| "eval_recall_micro": 0.8551840269425066, | |
| "eval_runtime": 8.4355, | |
| "eval_samples_per_second": 492.798, | |
| "eval_steps_per_second": 61.644, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.334851936218679, | |
| "grad_norm": 0.7749654650688171, | |
| "learning_rate": 3.534706966798757e-05, | |
| "loss": 0.2314, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.334851936218679, | |
| "eval_accuracy": 0.8948761125811884, | |
| "eval_f1_macro": 0.8659539030629905, | |
| "eval_f1_micro": 0.8948761125811884, | |
| "eval_loss": 0.6898870468139648, | |
| "eval_precision_macro": 0.8564830162508421, | |
| "eval_precision_micro": 0.8948761125811884, | |
| "eval_recall_macro": 0.8978688795664334, | |
| "eval_recall_micro": 0.8948761125811884, | |
| "eval_runtime": 8.4412, | |
| "eval_samples_per_second": 492.466, | |
| "eval_steps_per_second": 61.603, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.3917995444191344, | |
| "grad_norm": 0.007438243832439184, | |
| "learning_rate": 3.337927490728384e-05, | |
| "loss": 0.203, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.3917995444191344, | |
| "eval_accuracy": 0.8984844840028867, | |
| "eval_f1_macro": 0.8654367599963082, | |
| "eval_f1_micro": 0.8984844840028867, | |
| "eval_loss": 0.6225568652153015, | |
| "eval_precision_macro": 0.8710584795534867, | |
| "eval_precision_micro": 0.8984844840028867, | |
| "eval_recall_macro": 0.8907316535034207, | |
| "eval_recall_micro": 0.8984844840028867, | |
| "eval_runtime": 8.4582, | |
| "eval_samples_per_second": 491.477, | |
| "eval_steps_per_second": 61.479, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.44874715261959, | |
| "grad_norm": 1.0429240465164185, | |
| "learning_rate": 3.142782804357047e-05, | |
| "loss": 0.1784, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.44874715261959, | |
| "eval_accuracy": 0.8806831849891749, | |
| "eval_f1_macro": 0.8516903935082014, | |
| "eval_f1_micro": 0.8806831849891749, | |
| "eval_loss": 0.8446455001831055, | |
| "eval_precision_macro": 0.86002376045135, | |
| "eval_precision_micro": 0.8806831849891749, | |
| "eval_recall_macro": 0.8768325982997476, | |
| "eval_recall_micro": 0.8806831849891749, | |
| "eval_runtime": 8.4659, | |
| "eval_samples_per_second": 491.026, | |
| "eval_steps_per_second": 61.423, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.5056947608200457, | |
| "grad_norm": 89.87359619140625, | |
| "learning_rate": 2.9497547590207118e-05, | |
| "loss": 0.2044, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.5056947608200457, | |
| "eval_accuracy": 0.8556651431320664, | |
| "eval_f1_macro": 0.8466413367690651, | |
| "eval_f1_micro": 0.8556651431320664, | |
| "eval_loss": 0.9901952743530273, | |
| "eval_precision_macro": 0.8512325160929325, | |
| "eval_precision_micro": 0.8556651431320664, | |
| "eval_recall_macro": 0.875509109034969, | |
| "eval_recall_micro": 0.8556651431320664, | |
| "eval_runtime": 8.5077, | |
| "eval_samples_per_second": 488.617, | |
| "eval_steps_per_second": 61.121, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.562642369020501, | |
| "grad_norm": 0.013815644197165966, | |
| "learning_rate": 2.759319979644478e-05, | |
| "loss": 0.1914, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.562642369020501, | |
| "eval_accuracy": 0.8927110897281694, | |
| "eval_f1_macro": 0.8644517343056812, | |
| "eval_f1_micro": 0.8927110897281694, | |
| "eval_loss": 0.7158553004264832, | |
| "eval_precision_macro": 0.8493787111870335, | |
| "eval_precision_micro": 0.8927110897281694, | |
| "eval_recall_macro": 0.898066885887026, | |
| "eval_recall_micro": 0.8927110897281694, | |
| "eval_runtime": 8.4749, | |
| "eval_samples_per_second": 490.509, | |
| "eval_steps_per_second": 61.358, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.619589977220957, | |
| "grad_norm": 4.245390892028809, | |
| "learning_rate": 2.5719486878601176e-05, | |
| "loss": 0.1639, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.619589977220957, | |
| "eval_accuracy": 0.8864565792638922, | |
| "eval_f1_macro": 0.8458899447458572, | |
| "eval_f1_micro": 0.8864565792638922, | |
| "eval_loss": 0.729199230670929, | |
| "eval_precision_macro": 0.8401968974647541, | |
| "eval_precision_micro": 0.8864565792638922, | |
| "eval_recall_macro": 0.8805776786360812, | |
| "eval_recall_micro": 0.8864565792638922, | |
| "eval_runtime": 8.4586, | |
| "eval_samples_per_second": 491.451, | |
| "eval_steps_per_second": 61.476, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.6765375854214124, | |
| "grad_norm": 1.427230715751648, | |
| "learning_rate": 2.3881035409346452e-05, | |
| "loss": 0.218, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.6765375854214124, | |
| "eval_accuracy": 0.8965600192446476, | |
| "eval_f1_macro": 0.8710100150868682, | |
| "eval_f1_micro": 0.8965600192446476, | |
| "eval_loss": 0.6507639288902283, | |
| "eval_precision_macro": 0.8627987859589317, | |
| "eval_precision_micro": 0.8965600192446476, | |
| "eval_recall_macro": 0.8951286903819264, | |
| "eval_recall_micro": 0.8965600192446476, | |
| "eval_runtime": 8.4672, | |
| "eval_samples_per_second": 490.955, | |
| "eval_steps_per_second": 61.414, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.733485193621868, | |
| "grad_norm": 0.07296980172395706, | |
| "learning_rate": 2.208238489376805e-05, | |
| "loss": 0.1723, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.733485193621868, | |
| "eval_accuracy": 0.9023334135193649, | |
| "eval_f1_macro": 0.8816239296246752, | |
| "eval_f1_micro": 0.9023334135193649, | |
| "eval_loss": 0.6424487233161926, | |
| "eval_precision_macro": 0.8835852425011356, | |
| "eval_precision_micro": 0.9023334135193649, | |
| "eval_recall_macro": 0.8920920740693067, | |
| "eval_recall_micro": 0.9023334135193649, | |
| "eval_runtime": 8.4613, | |
| "eval_samples_per_second": 491.296, | |
| "eval_steps_per_second": 61.456, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.7904328018223232, | |
| "grad_norm": 125.23475646972656, | |
| "learning_rate": 2.0331438324793375e-05, | |
| "loss": 0.1682, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.7904328018223232, | |
| "eval_accuracy": 0.8972816935289872, | |
| "eval_f1_macro": 0.8822737330613647, | |
| "eval_f1_micro": 0.8972816935289872, | |
| "eval_loss": 0.6842340230941772, | |
| "eval_precision_macro": 0.8827282408108892, | |
| "eval_precision_micro": 0.8972816935289872, | |
| "eval_recall_macro": 0.8944889961638287, | |
| "eval_recall_micro": 0.8972816935289872, | |
| "eval_runtime": 8.4764, | |
| "eval_samples_per_second": 490.419, | |
| "eval_steps_per_second": 61.347, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.847380410022779, | |
| "grad_norm": 0.5098503828048706, | |
| "learning_rate": 1.8628863347570347e-05, | |
| "loss": 0.1441, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.847380410022779, | |
| "eval_accuracy": 0.8948761125811884, | |
| "eval_f1_macro": 0.8736085811230334, | |
| "eval_f1_micro": 0.8948761125811884, | |
| "eval_loss": 0.7110973596572876, | |
| "eval_precision_macro": 0.8674915707955687, | |
| "eval_precision_micro": 0.8948761125811884, | |
| "eval_recall_macro": 0.901699859992757, | |
| "eval_recall_micro": 0.8948761125811884, | |
| "eval_runtime": 8.4537, | |
| "eval_samples_per_second": 491.739, | |
| "eval_steps_per_second": 61.512, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.9043280182232345, | |
| "grad_norm": 0.8350435495376587, | |
| "learning_rate": 1.6975596030661532e-05, | |
| "loss": 0.1625, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.9043280182232345, | |
| "eval_accuracy": 0.9030550878037046, | |
| "eval_f1_macro": 0.8779275841497265, | |
| "eval_f1_micro": 0.9030550878037046, | |
| "eval_loss": 0.6505866050720215, | |
| "eval_precision_macro": 0.8690908487688577, | |
| "eval_precision_micro": 0.9030550878037046, | |
| "eval_recall_macro": 0.9045801541095179, | |
| "eval_recall_micro": 0.9030550878037046, | |
| "eval_runtime": 8.4694, | |
| "eval_samples_per_second": 490.823, | |
| "eval_steps_per_second": 61.397, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.9612756264236904, | |
| "grad_norm": 0.08747211843729019, | |
| "learning_rate": 1.537918058104578e-05, | |
| "loss": 0.1494, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.9612756264236904, | |
| "eval_accuracy": 0.9090690401732018, | |
| "eval_f1_macro": 0.8830141025055214, | |
| "eval_f1_micro": 0.9090690401732018, | |
| "eval_loss": 0.5693129301071167, | |
| "eval_precision_macro": 0.8816974949312945, | |
| "eval_precision_micro": 0.9090690401732018, | |
| "eval_recall_macro": 0.9015040014659215, | |
| "eval_recall_micro": 0.9090690401732018, | |
| "eval_runtime": 8.5221, | |
| "eval_samples_per_second": 487.793, | |
| "eval_steps_per_second": 61.018, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.0182232346241458, | |
| "grad_norm": 2.089958906173706, | |
| "learning_rate": 1.3843558868376073e-05, | |
| "loss": 0.1306, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.0182232346241458, | |
| "eval_accuracy": 0.9179696896800578, | |
| "eval_f1_macro": 0.8947539710239703, | |
| "eval_f1_micro": 0.9179696896800578, | |
| "eval_loss": 0.534771203994751, | |
| "eval_precision_macro": 0.8923066730381018, | |
| "eval_precision_micro": 0.9179696896800578, | |
| "eval_recall_macro": 0.9121332598176911, | |
| "eval_recall_micro": 0.9179696896800578, | |
| "eval_runtime": 8.5905, | |
| "eval_samples_per_second": 483.909, | |
| "eval_steps_per_second": 60.532, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.075170842824601, | |
| "grad_norm": 0.0017847216222435236, | |
| "learning_rate": 1.2372522650386443e-05, | |
| "loss": 0.0929, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.075170842824601, | |
| "eval_accuracy": 0.9162857830165985, | |
| "eval_f1_macro": 0.8899287169854497, | |
| "eval_f1_micro": 0.9162857830165985, | |
| "eval_loss": 0.5843378305435181, | |
| "eval_precision_macro": 0.8796301981250076, | |
| "eval_precision_micro": 0.9162857830165985, | |
| "eval_recall_macro": 0.9152639619070458, | |
| "eval_recall_micro": 0.9162857830165985, | |
| "eval_runtime": 8.4184, | |
| "eval_samples_per_second": 493.798, | |
| "eval_steps_per_second": 61.769, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.132118451025057, | |
| "grad_norm": 0.7512030601501465, | |
| "learning_rate": 1.096970421028209e-05, | |
| "loss": 0.1351, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.132118451025057, | |
| "eval_accuracy": 0.9023334135193649, | |
| "eval_f1_macro": 0.8733006387364801, | |
| "eval_f1_micro": 0.9023334135193649, | |
| "eval_loss": 0.6913191080093384, | |
| "eval_precision_macro": 0.8611014409270759, | |
| "eval_precision_micro": 0.9023334135193649, | |
| "eval_recall_macro": 0.9038150903313851, | |
| "eval_recall_micro": 0.9023334135193649, | |
| "eval_runtime": 8.4379, | |
| "eval_samples_per_second": 492.656, | |
| "eval_steps_per_second": 61.626, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.1890660592255125, | |
| "grad_norm": 0.005041371565312147, | |
| "learning_rate": 9.638567387904402e-06, | |
| "loss": 0.0907, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.1890660592255125, | |
| "eval_accuracy": 0.9206158287226365, | |
| "eval_f1_macro": 0.8974407788451421, | |
| "eval_f1_micro": 0.9206158287226365, | |
| "eval_loss": 0.5801523327827454, | |
| "eval_precision_macro": 0.8882950671220116, | |
| "eval_precision_micro": 0.9206158287226365, | |
| "eval_recall_macro": 0.9136820970599899, | |
| "eval_recall_micro": 0.9206158287226365, | |
| "eval_runtime": 8.4725, | |
| "eval_samples_per_second": 490.649, | |
| "eval_steps_per_second": 61.375, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.2460136674259683, | |
| "grad_norm": 0.6128404140472412, | |
| "learning_rate": 8.382399026816216e-06, | |
| "loss": 0.117, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.2460136674259683, | |
| "eval_accuracy": 0.9105123887418811, | |
| "eval_f1_macro": 0.8877368293575257, | |
| "eval_f1_micro": 0.9105123887418811, | |
| "eval_loss": 0.6602935791015625, | |
| "eval_precision_macro": 0.8839485939859522, | |
| "eval_precision_micro": 0.9105123887418811, | |
| "eval_recall_macro": 0.905732720534913, | |
| "eval_recall_micro": 0.9105123887418811, | |
| "eval_runtime": 8.482, | |
| "eval_samples_per_second": 490.097, | |
| "eval_steps_per_second": 61.306, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.3029612756264237, | |
| "grad_norm": 0.0018906695768237114, | |
| "learning_rate": 7.2065771743884275e-06, | |
| "loss": 0.0986, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.3029612756264237, | |
| "eval_accuracy": 0.9105123887418811, | |
| "eval_f1_macro": 0.887646044144043, | |
| "eval_f1_micro": 0.9105123887418811, | |
| "eval_loss": 0.6138319969177246, | |
| "eval_precision_macro": 0.887907584612917, | |
| "eval_precision_micro": 0.9105123887418811, | |
| "eval_recall_macro": 0.9029879731406512, | |
| "eval_recall_micro": 0.9105123887418811, | |
| "eval_runtime": 8.4603, | |
| "eval_samples_per_second": 491.355, | |
| "eval_steps_per_second": 61.464, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.359908883826879, | |
| "grad_norm": 1.5009195804595947, | |
| "learning_rate": 6.109293429462298e-06, | |
| "loss": 0.1196, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.359908883826879, | |
| "eval_accuracy": 0.9131585277844599, | |
| "eval_f1_macro": 0.8922838073892356, | |
| "eval_f1_micro": 0.9131585277844599, | |
| "eval_loss": 0.6528560519218445, | |
| "eval_precision_macro": 0.8910787251464412, | |
| "eval_precision_micro": 0.9131585277844599, | |
| "eval_recall_macro": 0.9099849910959319, | |
| "eval_recall_micro": 0.9131585277844599, | |
| "eval_runtime": 8.4579, | |
| "eval_samples_per_second": 491.492, | |
| "eval_steps_per_second": 61.481, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.416856492027335, | |
| "grad_norm": 2.331648349761963, | |
| "learning_rate": 5.0956926304652455e-06, | |
| "loss": 0.1056, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.416856492027335, | |
| "eval_accuracy": 0.9131585277844599, | |
| "eval_f1_macro": 0.8919235957427174, | |
| "eval_f1_micro": 0.9131585277844599, | |
| "eval_loss": 0.6215759515762329, | |
| "eval_precision_macro": 0.884609220404625, | |
| "eval_precision_micro": 0.9131585277844599, | |
| "eval_recall_macro": 0.9135941017288176, | |
| "eval_recall_micro": 0.9131585277844599, | |
| "eval_runtime": 8.4442, | |
| "eval_samples_per_second": 492.29, | |
| "eval_steps_per_second": 61.581, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.4738041002277904, | |
| "grad_norm": 148.2794189453125, | |
| "learning_rate": 4.168277560886878e-06, | |
| "loss": 0.1106, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.4738041002277904, | |
| "eval_accuracy": 0.9201347125330768, | |
| "eval_f1_macro": 0.891345440347701, | |
| "eval_f1_micro": 0.9201347125330768, | |
| "eval_loss": 0.5669803023338318, | |
| "eval_precision_macro": 0.8817511779041877, | |
| "eval_precision_micro": 0.9201347125330768, | |
| "eval_recall_macro": 0.9129521474780394, | |
| "eval_recall_micro": 0.9201347125330768, | |
| "eval_runtime": 8.4316, | |
| "eval_samples_per_second": 493.024, | |
| "eval_steps_per_second": 61.672, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.5307517084282463, | |
| "grad_norm": 4.748849868774414, | |
| "learning_rate": 3.3293381943799983e-06, | |
| "loss": 0.0953, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.5307517084282463, | |
| "eval_accuracy": 0.9189319220591773, | |
| "eval_f1_macro": 0.8934567988898604, | |
| "eval_f1_micro": 0.9189319220591773, | |
| "eval_loss": 0.569622278213501, | |
| "eval_precision_macro": 0.8825638399763271, | |
| "eval_precision_micro": 0.9189319220591773, | |
| "eval_recall_macro": 0.9187150647269768, | |
| "eval_recall_micro": 0.9189319220591773, | |
| "eval_runtime": 8.4421, | |
| "eval_samples_per_second": 492.411, | |
| "eval_steps_per_second": 61.596, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.5876993166287017, | |
| "grad_norm": 1.284857153892517, | |
| "learning_rate": 2.580946040356764e-06, | |
| "loss": 0.0989, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.5876993166287017, | |
| "eval_accuracy": 0.9194130382487371, | |
| "eval_f1_macro": 0.8981523298720768, | |
| "eval_f1_micro": 0.9194130382487371, | |
| "eval_loss": 0.5652771592140198, | |
| "eval_precision_macro": 0.8854580096444836, | |
| "eval_precision_micro": 0.9194130382487371, | |
| "eval_recall_macro": 0.9219999897271275, | |
| "eval_recall_micro": 0.9194130382487371, | |
| "eval_runtime": 8.43, | |
| "eval_samples_per_second": 493.122, | |
| "eval_steps_per_second": 61.685, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.644646924829157, | |
| "grad_norm": 1.4339042901992798, | |
| "learning_rate": 1.9249490290167914e-06, | |
| "loss": 0.0989, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.644646924829157, | |
| "eval_accuracy": 0.9138802020687996, | |
| "eval_f1_macro": 0.89412218234686, | |
| "eval_f1_micro": 0.9138802020687996, | |
| "eval_loss": 0.6019502282142639, | |
| "eval_precision_macro": 0.882568682117879, | |
| "eval_precision_micro": 0.9138802020687996, | |
| "eval_recall_macro": 0.9175339639156622, | |
| "eval_recall_micro": 0.9138802020687996, | |
| "eval_runtime": 8.4297, | |
| "eval_samples_per_second": 493.138, | |
| "eval_steps_per_second": 61.687, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.7015945330296125, | |
| "grad_norm": 0.7662363648414612, | |
| "learning_rate": 1.3629669484372722e-06, | |
| "loss": 0.0876, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.7015945330296125, | |
| "eval_accuracy": 0.9167668992061583, | |
| "eval_f1_macro": 0.8960433945398601, | |
| "eval_f1_micro": 0.9167668992061583, | |
| "eval_loss": 0.578229546546936, | |
| "eval_precision_macro": 0.8823539415499122, | |
| "eval_precision_micro": 0.9167668992061583, | |
| "eval_recall_macro": 0.9211442367709737, | |
| "eval_recall_micro": 0.9167668992061583, | |
| "eval_runtime": 8.4419, | |
| "eval_samples_per_second": 492.426, | |
| "eval_steps_per_second": 61.598, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.7585421412300684, | |
| "grad_norm": 0.8838233947753906, | |
| "learning_rate": 8.963874449915156e-07, | |
| "loss": 0.1004, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.7585421412300684, | |
| "eval_accuracy": 0.9186913639643974, | |
| "eval_f1_macro": 0.8974164477796313, | |
| "eval_f1_micro": 0.9186913639643974, | |
| "eval_loss": 0.5638399720191956, | |
| "eval_precision_macro": 0.8850042534249818, | |
| "eval_precision_micro": 0.9186913639643974, | |
| "eval_recall_macro": 0.9204573653096998, | |
| "eval_recall_micro": 0.9186913639643974, | |
| "eval_runtime": 8.4287, | |
| "eval_samples_per_second": 493.198, | |
| "eval_steps_per_second": 61.694, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.8154897494305238, | |
| "grad_norm": 0.8862270712852478, | |
| "learning_rate": 5.263625969720654e-07, | |
| "loss": 0.1163, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.8154897494305238, | |
| "eval_accuracy": 0.9189319220591773, | |
| "eval_f1_macro": 0.8978554181419179, | |
| "eval_f1_micro": 0.9189319220591773, | |
| "eval_loss": 0.5575982332229614, | |
| "eval_precision_macro": 0.8842809438001717, | |
| "eval_precision_micro": 0.9189319220591773, | |
| "eval_recall_macro": 0.9207880969765441, | |
| "eval_recall_micro": 0.9189319220591773, | |
| "eval_runtime": 8.4411, | |
| "eval_samples_per_second": 492.47, | |
| "eval_steps_per_second": 61.603, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.8724373576309796, | |
| "grad_norm": 0.0018380646361038089, | |
| "learning_rate": 2.5380606987847725e-07, | |
| "loss": 0.1014, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.8724373576309796, | |
| "eval_accuracy": 0.9184508058696175, | |
| "eval_f1_macro": 0.8970972466165426, | |
| "eval_f1_micro": 0.9184508058696175, | |
| "eval_loss": 0.5561444759368896, | |
| "eval_precision_macro": 0.8836350789167062, | |
| "eval_precision_micro": 0.9184508058696175, | |
| "eval_recall_macro": 0.9206973516595941, | |
| "eval_recall_micro": 0.9184508058696175, | |
| "eval_runtime": 8.432, | |
| "eval_samples_per_second": 493.002, | |
| "eval_steps_per_second": 61.67, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.929384965831435, | |
| "grad_norm": 1.3534623384475708, | |
| "learning_rate": 7.939086039413291e-08, | |
| "loss": 0.0948, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.929384965831435, | |
| "eval_accuracy": 0.9191724801539571, | |
| "eval_f1_macro": 0.8982632597992836, | |
| "eval_f1_micro": 0.9191724801539571, | |
| "eval_loss": 0.5567488074302673, | |
| "eval_precision_macro": 0.8854065999210586, | |
| "eval_precision_micro": 0.9191724801539571, | |
| "eval_recall_macro": 0.9212323971384597, | |
| "eval_recall_micro": 0.9191724801539571, | |
| "eval_runtime": 8.4196, | |
| "eval_samples_per_second": 493.73, | |
| "eval_steps_per_second": 61.761, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.9863325740318905, | |
| "grad_norm": 0.00499533349648118, | |
| "learning_rate": 3.6007811694149795e-09, | |
| "loss": 0.0915, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.9863325740318905, | |
| "eval_accuracy": 0.9189319220591773, | |
| "eval_f1_macro": 0.8979067522497397, | |
| "eval_f1_micro": 0.9189319220591773, | |
| "eval_loss": 0.5568965673446655, | |
| "eval_precision_macro": 0.8849842519873105, | |
| "eval_precision_micro": 0.9189319220591773, | |
| "eval_recall_macro": 0.9210492469553095, | |
| "eval_recall_micro": 0.9189319220591773, | |
| "eval_runtime": 8.4437, | |
| "eval_samples_per_second": 492.321, | |
| "eval_steps_per_second": 61.585, | |
| "step": 35000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 35120, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.368366329856e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |