| { |
| "best_metric": 0.8982632597992836, |
| "best_model_checkpoint": "./arabert_author_model/checkpoint-34500", |
| "epoch": 3.929384965831435, |
| "eval_steps": 500, |
| "global_step": 34500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05694760820045558, |
| "grad_norm": 15.19107437133789, |
| "learning_rate": 1.1343963553530753e-05, |
| "loss": 2.7205, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05694760820045558, |
| "eval_accuracy": 0.3750300697618475, |
| "eval_f1_macro": 0.28596660166175775, |
| "eval_f1_micro": 0.3750300697618475, |
| "eval_loss": 2.32218337059021, |
| "eval_precision_macro": 0.3865217733215992, |
| "eval_precision_micro": 0.3750300697618475, |
| "eval_recall_macro": 0.3672924033460141, |
| "eval_recall_micro": 0.3750300697618475, |
| "eval_runtime": 9.1613, |
| "eval_samples_per_second": 453.755, |
| "eval_steps_per_second": 56.76, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11389521640091116, |
| "grad_norm": 21.725738525390625, |
| "learning_rate": 2.2710706150341686e-05, |
| "loss": 1.6025, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.11389521640091116, |
| "eval_accuracy": 0.4892951647822949, |
| "eval_f1_macro": 0.4406771878616986, |
| "eval_f1_micro": 0.4892951647822949, |
| "eval_loss": 1.6631975173950195, |
| "eval_precision_macro": 0.56733686366756, |
| "eval_precision_micro": 0.4892951647822949, |
| "eval_recall_macro": 0.48704653807602066, |
| "eval_recall_micro": 0.4892951647822949, |
| "eval_runtime": 8.477, |
| "eval_samples_per_second": 490.385, |
| "eval_steps_per_second": 61.342, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.17084282460136674, |
| "grad_norm": 13.716214179992676, |
| "learning_rate": 3.4100227790432806e-05, |
| "loss": 1.0328, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.17084282460136674, |
| "eval_accuracy": 0.6615347606446957, |
| "eval_f1_macro": 0.5830637518928246, |
| "eval_f1_micro": 0.6615347606446957, |
| "eval_loss": 1.1024270057678223, |
| "eval_precision_macro": 0.697472543022928, |
| "eval_precision_micro": 0.6615347606446957, |
| "eval_recall_macro": 0.6033302465966366, |
| "eval_recall_micro": 0.6615347606446957, |
| "eval_runtime": 8.4612, |
| "eval_samples_per_second": 491.299, |
| "eval_steps_per_second": 61.457, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.22779043280182232, |
| "grad_norm": 17.078737258911133, |
| "learning_rate": 4.548974943052392e-05, |
| "loss": 0.8504, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.22779043280182232, |
| "eval_accuracy": 0.639162857830166, |
| "eval_f1_macro": 0.5476482518700577, |
| "eval_f1_micro": 0.639162857830166, |
| "eval_loss": 1.2488256692886353, |
| "eval_precision_macro": 0.668531963924838, |
| "eval_precision_micro": 0.639162857830166, |
| "eval_recall_macro": 0.5729925283587635, |
| "eval_recall_micro": 0.639162857830166, |
| "eval_runtime": 8.6485, |
| "eval_samples_per_second": 480.664, |
| "eval_steps_per_second": 60.126, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2847380410022779, |
| "grad_norm": 45.41240692138672, |
| "learning_rate": 5.6856492027334856e-05, |
| "loss": 0.7882, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2847380410022779, |
| "eval_accuracy": 0.6523935530430599, |
| "eval_f1_macro": 0.5994264876841328, |
| "eval_f1_micro": 0.6523935530430599, |
| "eval_loss": 1.2463157176971436, |
| "eval_precision_macro": 0.6529647074997418, |
| "eval_precision_micro": 0.6523935530430599, |
| "eval_recall_macro": 0.6436371671271747, |
| "eval_recall_micro": 0.6523935530430599, |
| "eval_runtime": 8.4613, |
| "eval_samples_per_second": 491.293, |
| "eval_steps_per_second": 61.456, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.3416856492027335, |
| "grad_norm": 17.272947311401367, |
| "learning_rate": 6.824601366742597e-05, |
| "loss": 0.8087, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.3416856492027335, |
| "eval_accuracy": 0.7055568919894154, |
| "eval_f1_macro": 0.6265017927625092, |
| "eval_f1_micro": 0.7055568919894154, |
| "eval_loss": 1.2766073942184448, |
| "eval_precision_macro": 0.6878658131219406, |
| "eval_precision_micro": 0.7055568919894154, |
| "eval_recall_macro": 0.6658140911489481, |
| "eval_recall_micro": 0.7055568919894154, |
| "eval_runtime": 8.9536, |
| "eval_samples_per_second": 464.281, |
| "eval_steps_per_second": 58.077, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.39863325740318906, |
| "grad_norm": 36.27668380737305, |
| "learning_rate": 7.96355353075171e-05, |
| "loss": 0.7887, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.39863325740318906, |
| "eval_accuracy": 0.7149386576858311, |
| "eval_f1_macro": 0.6616726950770662, |
| "eval_f1_micro": 0.7149386576858311, |
| "eval_loss": 1.1627144813537598, |
| "eval_precision_macro": 0.7514554135519156, |
| "eval_precision_micro": 0.7149386576858311, |
| "eval_recall_macro": 0.6834968200252539, |
| "eval_recall_micro": 0.7149386576858311, |
| "eval_runtime": 8.4816, |
| "eval_samples_per_second": 490.118, |
| "eval_steps_per_second": 61.309, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.45558086560364464, |
| "grad_norm": 3.0386502742767334, |
| "learning_rate": 7.995372539966228e-05, |
| "loss": 0.7357, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.45558086560364464, |
| "eval_accuracy": 0.7611258118835699, |
| "eval_f1_macro": 0.6745353046335729, |
| "eval_f1_micro": 0.7611258118835699, |
| "eval_loss": 0.9807717800140381, |
| "eval_precision_macro": 0.7007756446431578, |
| "eval_precision_micro": 0.7611258118835699, |
| "eval_recall_macro": 0.7425048520300992, |
| "eval_recall_micro": 0.7611258118835699, |
| "eval_runtime": 8.4828, |
| "eval_samples_per_second": 490.05, |
| "eval_steps_per_second": 61.3, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.5125284738041003, |
| "grad_norm": 2.7381744384765625, |
| "learning_rate": 7.980884762455173e-05, |
| "loss": 0.7022, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5125284738041003, |
| "eval_accuracy": 0.76088525378879, |
| "eval_f1_macro": 0.6871238458955197, |
| "eval_f1_micro": 0.76088525378879, |
| "eval_loss": 1.1158560514450073, |
| "eval_precision_macro": 0.7591168248124284, |
| "eval_precision_micro": 0.76088525378879, |
| "eval_recall_macro": 0.7099914791461782, |
| "eval_recall_micro": 0.76088525378879, |
| "eval_runtime": 9.0538, |
| "eval_samples_per_second": 459.145, |
| "eval_steps_per_second": 57.435, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.5694760820045558, |
| "grad_norm": 32.207054138183594, |
| "learning_rate": 7.956625791551662e-05, |
| "loss": 0.6587, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.5694760820045558, |
| "eval_accuracy": 0.8008178975222516, |
| "eval_f1_macro": 0.76243086871798, |
| "eval_f1_micro": 0.8008178975222516, |
| "eval_loss": 0.8932181000709534, |
| "eval_precision_macro": 0.7876270109291943, |
| "eval_precision_micro": 0.8008178975222516, |
| "eval_recall_macro": 0.7876260076814952, |
| "eval_recall_micro": 0.8008178975222516, |
| "eval_runtime": 8.5148, |
| "eval_samples_per_second": 488.207, |
| "eval_steps_per_second": 61.07, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.6264236902050114, |
| "grad_norm": 0.29054978489875793, |
| "learning_rate": 7.922558317223566e-05, |
| "loss": 0.5719, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.6264236902050114, |
| "eval_accuracy": 0.773875390906904, |
| "eval_f1_macro": 0.7249768597960354, |
| "eval_f1_micro": 0.773875390906904, |
| "eval_loss": 0.9966481328010559, |
| "eval_precision_macro": 0.7358501532705917, |
| "eval_precision_micro": 0.773875390906904, |
| "eval_recall_macro": 0.7614420126439011, |
| "eval_recall_micro": 0.773875390906904, |
| "eval_runtime": 8.4793, |
| "eval_samples_per_second": 490.251, |
| "eval_steps_per_second": 61.326, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.683371298405467, |
| "grad_norm": 1.0872896909713745, |
| "learning_rate": 7.878805260363261e-05, |
| "loss": 0.6425, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.683371298405467, |
| "eval_accuracy": 0.771710368053885, |
| "eval_f1_macro": 0.7254677224325083, |
| "eval_f1_micro": 0.771710368053885, |
| "eval_loss": 0.9605371952056885, |
| "eval_precision_macro": 0.8005336996598393, |
| "eval_precision_micro": 0.771710368053885, |
| "eval_recall_macro": 0.7547512504988463, |
| "eval_recall_micro": 0.771710368053885, |
| "eval_runtime": 8.4743, |
| "eval_samples_per_second": 490.54, |
| "eval_steps_per_second": 61.362, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.7403189066059226, |
| "grad_norm": 189.1195068359375, |
| "learning_rate": 7.82547465603587e-05, |
| "loss": 0.5433, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7403189066059226, |
| "eval_accuracy": 0.7700264613904257, |
| "eval_f1_macro": 0.740451647791416, |
| "eval_f1_micro": 0.7700264613904257, |
| "eval_loss": 1.0783036947250366, |
| "eval_precision_macro": 0.769666104173451, |
| "eval_precision_micro": 0.7700264613904257, |
| "eval_recall_macro": 0.7659213860833466, |
| "eval_recall_micro": 0.7700264613904257, |
| "eval_runtime": 8.4608, |
| "eval_samples_per_second": 491.328, |
| "eval_steps_per_second": 61.46, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.7972665148063781, |
| "grad_norm": 0.5625237822532654, |
| "learning_rate": 7.762833068916386e-05, |
| "loss": 0.5736, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.7972665148063781, |
| "eval_accuracy": 0.7782054366129421, |
| "eval_f1_macro": 0.6779030343365783, |
| "eval_f1_micro": 0.7782054366129421, |
| "eval_loss": 1.0509027242660522, |
| "eval_precision_macro": 0.7339555194909612, |
| "eval_precision_micro": 0.7782054366129421, |
| "eval_recall_macro": 0.6828886626680506, |
| "eval_recall_micro": 0.7782054366129421, |
| "eval_runtime": 8.481, |
| "eval_samples_per_second": 490.156, |
| "eval_steps_per_second": 61.314, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.8542141230068337, |
| "grad_norm": 64.8875503540039, |
| "learning_rate": 7.690784156928418e-05, |
| "loss": 0.5273, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.8542141230068337, |
| "eval_accuracy": 0.7774837623286024, |
| "eval_f1_macro": 0.7006759117702724, |
| "eval_f1_micro": 0.7774837623286024, |
| "eval_loss": 1.083090901374817, |
| "eval_precision_macro": 0.7615638623524419, |
| "eval_precision_micro": 0.7774837623286024, |
| "eval_recall_macro": 0.7385809772355115, |
| "eval_recall_micro": 0.7774837623286024, |
| "eval_runtime": 8.4704, |
| "eval_samples_per_second": 490.769, |
| "eval_steps_per_second": 61.39, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.9111617312072893, |
| "grad_norm": 0.9078112840652466, |
| "learning_rate": 7.609621959255558e-05, |
| "loss": 0.5268, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.9111617312072893, |
| "eval_accuracy": 0.7445273033437575, |
| "eval_f1_macro": 0.699580887402278, |
| "eval_f1_micro": 0.7445273033437575, |
| "eval_loss": 1.3979923725128174, |
| "eval_precision_macro": 0.7001048011829232, |
| "eval_precision_micro": 0.7445273033437575, |
| "eval_recall_macro": 0.7726266308290096, |
| "eval_recall_micro": 0.7445273033437575, |
| "eval_runtime": 8.4384, |
| "eval_samples_per_second": 492.631, |
| "eval_steps_per_second": 61.623, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.9681093394077449, |
| "grad_norm": 42.65549850463867, |
| "learning_rate": 7.519735782617663e-05, |
| "loss": 0.5462, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.9681093394077449, |
| "eval_accuracy": 0.8287226365167187, |
| "eval_f1_macro": 0.7785313462283256, |
| "eval_f1_micro": 0.8287226365167187, |
| "eval_loss": 0.9026873111724854, |
| "eval_precision_macro": 0.7868255645583773, |
| "eval_precision_micro": 0.8287226365167187, |
| "eval_recall_macro": 0.8024053906273642, |
| "eval_recall_micro": 0.8287226365167187, |
| "eval_runtime": 8.4335, |
| "eval_samples_per_second": 492.916, |
| "eval_steps_per_second": 61.659, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.0250569476082005, |
| "grad_norm": 0.9462873935699463, |
| "learning_rate": 7.420987383057407e-05, |
| "loss": 0.486, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.0250569476082005, |
| "eval_accuracy": 0.7211931681501083, |
| "eval_f1_macro": 0.6932948143152837, |
| "eval_f1_micro": 0.7211931681501083, |
| "eval_loss": 1.7263842821121216, |
| "eval_precision_macro": 0.7360552900892146, |
| "eval_precision_micro": 0.7211931681501083, |
| "eval_recall_macro": 0.7409941847523269, |
| "eval_recall_micro": 0.7211931681501083, |
| "eval_runtime": 8.473, |
| "eval_samples_per_second": 490.615, |
| "eval_steps_per_second": 61.371, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.082004555808656, |
| "grad_norm": 0.8366897106170654, |
| "learning_rate": 7.314014528807089e-05, |
| "loss": 0.4458, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.082004555808656, |
| "eval_accuracy": 0.8186191965359635, |
| "eval_f1_macro": 0.7675006381723768, |
| "eval_f1_micro": 0.8186191965359635, |
| "eval_loss": 0.9773014783859253, |
| "eval_precision_macro": 0.7844621076644617, |
| "eval_precision_micro": 0.8186191965359635, |
| "eval_recall_macro": 0.8041851318048103, |
| "eval_recall_micro": 0.8186191965359635, |
| "eval_runtime": 8.4706, |
| "eval_samples_per_second": 490.754, |
| "eval_steps_per_second": 61.389, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.1389521640091116, |
| "grad_norm": 2.115098237991333, |
| "learning_rate": 7.198652696785955e-05, |
| "loss": 0.4102, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.1389521640091116, |
| "eval_accuracy": 0.8316093336540774, |
| "eval_f1_macro": 0.7933624073358755, |
| "eval_f1_micro": 0.8316093336540774, |
| "eval_loss": 0.967036247253418, |
| "eval_precision_macro": 0.7949251475033909, |
| "eval_precision_micro": 0.8316093336540774, |
| "eval_recall_macro": 0.8274655786829063, |
| "eval_recall_micro": 0.8316093336540774, |
| "eval_runtime": 8.4509, |
| "eval_samples_per_second": 491.9, |
| "eval_steps_per_second": 61.532, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.1958997722095672, |
| "grad_norm": 1.0413740873336792, |
| "learning_rate": 7.075392750273938e-05, |
| "loss": 0.3773, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.1958997722095672, |
| "eval_accuracy": 0.8171758479672842, |
| "eval_f1_macro": 0.7981622962245145, |
| "eval_f1_micro": 0.8171758479672842, |
| "eval_loss": 0.9198176860809326, |
| "eval_precision_macro": 0.819291401144176, |
| "eval_precision_micro": 0.8171758479672842, |
| "eval_recall_macro": 0.8245482183363974, |
| "eval_recall_micro": 0.8171758479672842, |
| "eval_runtime": 8.4577, |
| "eval_samples_per_second": 491.505, |
| "eval_steps_per_second": 61.482, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.2528473804100229, |
| "grad_norm": 7.1126203536987305, |
| "learning_rate": 6.94453904277921e-05, |
| "loss": 0.3796, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.2528473804100229, |
| "eval_accuracy": 0.7741159490016839, |
| "eval_f1_macro": 0.7334267874337288, |
| "eval_f1_micro": 0.7741159490016839, |
| "eval_loss": 1.324471116065979, |
| "eval_precision_macro": 0.7554977548108798, |
| "eval_precision_micro": 0.7741159490016839, |
| "eval_recall_macro": 0.7865189643712818, |
| "eval_recall_micro": 0.7741159490016839, |
| "eval_runtime": 8.4546, |
| "eval_samples_per_second": 491.688, |
| "eval_steps_per_second": 61.505, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.3097949886104785, |
| "grad_norm": 85.53689575195312, |
| "learning_rate": 6.806414678327537e-05, |
| "loss": 0.4432, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.3097949886104785, |
| "eval_accuracy": 0.8092374308395478, |
| "eval_f1_macro": 0.7438442519057924, |
| "eval_f1_micro": 0.8092374308395478, |
| "eval_loss": 1.1105079650878906, |
| "eval_precision_macro": 0.7748012629794462, |
| "eval_precision_micro": 0.8092374308395478, |
| "eval_recall_macro": 0.7783932685192183, |
| "eval_recall_micro": 0.8092374308395478, |
| "eval_runtime": 8.4968, |
| "eval_samples_per_second": 489.244, |
| "eval_steps_per_second": 61.2, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.366742596810934, |
| "grad_norm": 1.0569897890090942, |
| "learning_rate": 6.661360713653681e-05, |
| "loss": 0.389, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.366742596810934, |
| "eval_accuracy": 0.8128458022612461, |
| "eval_f1_macro": 0.7595338289100665, |
| "eval_f1_micro": 0.8128458022612461, |
| "eval_loss": 1.0267034769058228, |
| "eval_precision_macro": 0.7590761169934961, |
| "eval_precision_micro": 0.8128458022612461, |
| "eval_recall_macro": 0.8036606747506161, |
| "eval_recall_micro": 0.8128458022612461, |
| "eval_runtime": 8.4648, |
| "eval_samples_per_second": 491.09, |
| "eval_steps_per_second": 61.431, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.4236902050113895, |
| "grad_norm": 14.686357498168945, |
| "learning_rate": 6.509735316063996e-05, |
| "loss": 0.6087, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.4236902050113895, |
| "eval_accuracy": 0.8443589126774116, |
| "eval_f1_macro": 0.7997974993396904, |
| "eval_f1_micro": 0.8443589126774116, |
| "eval_loss": 0.9193519353866577, |
| "eval_precision_macro": 0.7894219117254144, |
| "eval_precision_micro": 0.8443589126774116, |
| "eval_recall_macro": 0.833108777347993, |
| "eval_recall_micro": 0.8443589126774116, |
| "eval_runtime": 8.4379, |
| "eval_samples_per_second": 492.657, |
| "eval_steps_per_second": 61.627, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.4806378132118452, |
| "grad_norm": 0.05014890432357788, |
| "learning_rate": 6.352234452003862e-05, |
| "loss": 0.4803, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.4806378132118452, |
| "eval_accuracy": 0.8535001202790474, |
| "eval_f1_macro": 0.8265458036830905, |
| "eval_f1_micro": 0.8535001202790474, |
| "eval_loss": 0.879317581653595, |
| "eval_precision_macro": 0.8329036145166532, |
| "eval_precision_micro": 0.8535001202790474, |
| "eval_recall_macro": 0.8340628674797995, |
| "eval_recall_micro": 0.8535001202790474, |
| "eval_runtime": 8.4157, |
| "eval_samples_per_second": 493.96, |
| "eval_steps_per_second": 61.79, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.5375854214123006, |
| "grad_norm": 40.28041076660156, |
| "learning_rate": 6.188948654276723e-05, |
| "loss": 0.5513, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.5375854214123006, |
| "eval_accuracy": 0.7782054366129421, |
| "eval_f1_macro": 0.7066835334217871, |
| "eval_f1_micro": 0.7782054366129421, |
| "eval_loss": 1.1909141540527344, |
| "eval_precision_macro": 0.7255202837659691, |
| "eval_precision_micro": 0.7782054366129421, |
| "eval_recall_macro": 0.7402758652568006, |
| "eval_recall_micro": 0.7782054366129421, |
| "eval_runtime": 8.4835, |
| "eval_samples_per_second": 490.012, |
| "eval_steps_per_second": 61.296, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.5945330296127562, |
| "grad_norm": 8.929847717285156, |
| "learning_rate": 6.019936353958699e-05, |
| "loss": 0.693, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.5945330296127562, |
| "eval_accuracy": 0.2460909309598268, |
| "eval_f1_macro": 0.20336556842910194, |
| "eval_f1_micro": 0.2460909309598268, |
| "eval_loss": 2.8277578353881836, |
| "eval_precision_macro": 0.5226671539146224, |
| "eval_precision_micro": 0.2460909309598268, |
| "eval_recall_macro": 0.1936798727163823, |
| "eval_recall_micro": 0.2460909309598268, |
| "eval_runtime": 8.4782, |
| "eval_samples_per_second": 490.315, |
| "eval_steps_per_second": 61.334, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.6514806378132119, |
| "grad_norm": 28.1862735748291, |
| "learning_rate": 5.8459364260048594e-05, |
| "loss": 0.9646, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.6514806378132119, |
| "eval_accuracy": 0.8246331489054607, |
| "eval_f1_macro": 0.7913907456133474, |
| "eval_f1_micro": 0.8246331489054607, |
| "eval_loss": 0.948131799697876, |
| "eval_precision_macro": 0.799701208597097, |
| "eval_precision_micro": 0.8246331489054607, |
| "eval_recall_macro": 0.8123299807423895, |
| "eval_recall_micro": 0.8246331489054607, |
| "eval_runtime": 8.4643, |
| "eval_samples_per_second": 491.124, |
| "eval_steps_per_second": 61.435, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.7084282460136673, |
| "grad_norm": 7.083284854888916, |
| "learning_rate": 5.6673785111054136e-05, |
| "loss": 0.462, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.7084282460136673, |
| "eval_accuracy": 0.8472456098147703, |
| "eval_f1_macro": 0.8091993297668394, |
| "eval_f1_micro": 0.8472456098147703, |
| "eval_loss": 0.8667464256286621, |
| "eval_precision_macro": 0.8198322314440802, |
| "eval_precision_micro": 0.8472456098147703, |
| "eval_recall_macro": 0.8266325847969769, |
| "eval_recall_micro": 0.8472456098147703, |
| "eval_runtime": 8.4589, |
| "eval_samples_per_second": 491.435, |
| "eval_steps_per_second": 61.474, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.7653758542141231, |
| "grad_norm": 65.16443634033203, |
| "learning_rate": 5.484703504533721e-05, |
| "loss": 0.4093, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.7653758542141231, |
| "eval_accuracy": 0.8116430117873467, |
| "eval_f1_macro": 0.7700885656504642, |
| "eval_f1_micro": 0.8116430117873467, |
| "eval_loss": 1.1932649612426758, |
| "eval_precision_macro": 0.8232095109547221, |
| "eval_precision_micro": 0.8116430117873467, |
| "eval_recall_macro": 0.775800293436143, |
| "eval_recall_micro": 0.8116430117873467, |
| "eval_runtime": 8.4735, |
| "eval_samples_per_second": 490.589, |
| "eval_steps_per_second": 61.368, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.8223234624145785, |
| "grad_norm": 15.238636016845703, |
| "learning_rate": 5.2983624674875084e-05, |
| "loss": 0.349, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.8223234624145785, |
| "eval_accuracy": 0.8799615107048352, |
| "eval_f1_macro": 0.8377281287378512, |
| "eval_f1_micro": 0.8799615107048352, |
| "eval_loss": 0.7749123573303223, |
| "eval_precision_macro": 0.8358272736538074, |
| "eval_precision_micro": 0.8799615107048352, |
| "eval_recall_macro": 0.8723899950094214, |
| "eval_recall_micro": 0.8799615107048352, |
| "eval_runtime": 8.4617, |
| "eval_samples_per_second": 491.272, |
| "eval_steps_per_second": 61.453, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.8792710706150342, |
| "grad_norm": 1.931815505027771, |
| "learning_rate": 5.108815513328386e-05, |
| "loss": 0.3333, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.8792710706150342, |
| "eval_accuracy": 0.8799615107048352, |
| "eval_f1_macro": 0.851689807067872, |
| "eval_f1_micro": 0.8799615107048352, |
| "eval_loss": 0.6482954025268555, |
| "eval_precision_macro": 0.8439980638748368, |
| "eval_precision_micro": 0.8799615107048352, |
| "eval_recall_macro": 0.8709009740944947, |
| "eval_recall_micro": 0.8799615107048352, |
| "eval_runtime": 8.4707, |
| "eval_samples_per_second": 490.751, |
| "eval_steps_per_second": 61.388, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.9362186788154898, |
| "grad_norm": 14.734030723571777, |
| "learning_rate": 4.916530671469754e-05, |
| "loss": 0.3449, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.9362186788154898, |
| "eval_accuracy": 0.7890305508780371, |
| "eval_f1_macro": 0.7375164837600725, |
| "eval_f1_micro": 0.7890305508780371, |
| "eval_loss": 1.2130000591278076, |
| "eval_precision_macro": 0.7696829158091395, |
| "eval_precision_micro": 0.7890305508780371, |
| "eval_recall_macro": 0.7545432686041814, |
| "eval_recall_micro": 0.7890305508780371, |
| "eval_runtime": 8.448, |
| "eval_samples_per_second": 492.07, |
| "eval_steps_per_second": 61.553, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.9931662870159452, |
| "grad_norm": 4.0362091064453125, |
| "learning_rate": 4.7219827317183907e-05, |
| "loss": 0.2982, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.9931662870159452, |
| "eval_accuracy": 0.8599951888381044, |
| "eval_f1_macro": 0.8354955249556116, |
| "eval_f1_micro": 0.8599951888381044, |
| "eval_loss": 0.8803524374961853, |
| "eval_precision_macro": 0.8342493543222153, |
| "eval_precision_micro": 0.8599951888381044, |
| "eval_recall_macro": 0.8556466165427711, |
| "eval_recall_micro": 0.8599951888381044, |
| "eval_runtime": 8.4723, |
| "eval_samples_per_second": 490.66, |
| "eval_steps_per_second": 61.377, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.050113895216401, |
| "grad_norm": 0.7534123659133911, |
| "learning_rate": 4.525652071923279e-05, |
| "loss": 0.2348, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.050113895216401, |
| "eval_accuracy": 0.8448400288669714, |
| "eval_f1_macro": 0.8190720986060728, |
| "eval_f1_micro": 0.8448400288669714, |
| "eval_loss": 1.1245763301849365, |
| "eval_precision_macro": 0.8337767355676724, |
| "eval_precision_micro": 0.8448400288669714, |
| "eval_recall_macro": 0.8557743090961959, |
| "eval_recall_micro": 0.8448400288669714, |
| "eval_runtime": 8.4721, |
| "eval_samples_per_second": 490.667, |
| "eval_steps_per_second": 61.378, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.1070615034168565, |
| "grad_norm": 1.4927374124526978, |
| "learning_rate": 4.328023471826429e-05, |
| "loss": 0.2299, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.1070615034168565, |
| "eval_accuracy": 0.8708203031031995, |
| "eval_f1_macro": 0.8303241771039817, |
| "eval_f1_micro": 0.8708203031031995, |
| "eval_loss": 0.8329204320907593, |
| "eval_precision_macro": 0.83267401976066, |
| "eval_precision_micro": 0.8708203031031995, |
| "eval_recall_macro": 0.8610797711013763, |
| "eval_recall_micro": 0.8708203031031995, |
| "eval_runtime": 8.4578, |
| "eval_samples_per_second": 491.498, |
| "eval_steps_per_second": 61.482, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.164009111617312, |
| "grad_norm": 1.7383619546890259, |
| "learning_rate": 4.129584916044555e-05, |
| "loss": 0.2468, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.164009111617312, |
| "eval_accuracy": 0.8773153716622565, |
| "eval_f1_macro": 0.846177762458016, |
| "eval_f1_micro": 0.8773153716622565, |
| "eval_loss": 0.7664415836334229, |
| "eval_precision_macro": 0.8368864405451267, |
| "eval_precision_micro": 0.8773153716622565, |
| "eval_recall_macro": 0.8780489137762446, |
| "eval_recall_micro": 0.8773153716622565, |
| "eval_runtime": 8.4381, |
| "eval_samples_per_second": 492.645, |
| "eval_steps_per_second": 61.625, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.2209567198177678, |
| "grad_norm": 400.013916015625, |
| "learning_rate": 3.930826389137262e-05, |
| "loss": 0.1899, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.2209567198177678, |
| "eval_accuracy": 0.8862160211691124, |
| "eval_f1_macro": 0.8558563382552302, |
| "eval_f1_micro": 0.8862160211691124, |
| "eval_loss": 0.7495226263999939, |
| "eval_precision_macro": 0.8555231307289006, |
| "eval_precision_micro": 0.8862160211691124, |
| "eval_recall_macro": 0.8758801511510593, |
| "eval_recall_micro": 0.8862160211691124, |
| "eval_runtime": 8.4445, |
| "eval_samples_per_second": 492.274, |
| "eval_steps_per_second": 61.579, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.277904328018223, |
| "grad_norm": 2.0449647903442383, |
| "learning_rate": 3.732635344608829e-05, |
| "loss": 0.1977, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.277904328018223, |
| "eval_accuracy": 0.8551840269425066, |
| "eval_f1_macro": 0.8198359165864053, |
| "eval_f1_micro": 0.8551840269425066, |
| "eval_loss": 1.0149922370910645, |
| "eval_precision_macro": 0.8328051392132652, |
| "eval_precision_micro": 0.8551840269425066, |
| "eval_recall_macro": 0.853902169918103, |
| "eval_recall_micro": 0.8551840269425066, |
| "eval_runtime": 8.4355, |
| "eval_samples_per_second": 492.798, |
| "eval_steps_per_second": 61.644, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.334851936218679, |
| "grad_norm": 0.7749654650688171, |
| "learning_rate": 3.534706966798757e-05, |
| "loss": 0.2314, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.334851936218679, |
| "eval_accuracy": 0.8948761125811884, |
| "eval_f1_macro": 0.8659539030629905, |
| "eval_f1_micro": 0.8948761125811884, |
| "eval_loss": 0.6898870468139648, |
| "eval_precision_macro": 0.8564830162508421, |
| "eval_precision_micro": 0.8948761125811884, |
| "eval_recall_macro": 0.8978688795664334, |
| "eval_recall_micro": 0.8948761125811884, |
| "eval_runtime": 8.4412, |
| "eval_samples_per_second": 492.466, |
| "eval_steps_per_second": 61.603, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.3917995444191344, |
| "grad_norm": 0.007438243832439184, |
| "learning_rate": 3.337927490728384e-05, |
| "loss": 0.203, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.3917995444191344, |
| "eval_accuracy": 0.8984844840028867, |
| "eval_f1_macro": 0.8654367599963082, |
| "eval_f1_micro": 0.8984844840028867, |
| "eval_loss": 0.6225568652153015, |
| "eval_precision_macro": 0.8710584795534867, |
| "eval_precision_micro": 0.8984844840028867, |
| "eval_recall_macro": 0.8907316535034207, |
| "eval_recall_micro": 0.8984844840028867, |
| "eval_runtime": 8.4582, |
| "eval_samples_per_second": 491.477, |
| "eval_steps_per_second": 61.479, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.44874715261959, |
| "grad_norm": 1.0429240465164185, |
| "learning_rate": 3.142782804357047e-05, |
| "loss": 0.1784, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.44874715261959, |
| "eval_accuracy": 0.8806831849891749, |
| "eval_f1_macro": 0.8516903935082014, |
| "eval_f1_micro": 0.8806831849891749, |
| "eval_loss": 0.8446455001831055, |
| "eval_precision_macro": 0.86002376045135, |
| "eval_precision_micro": 0.8806831849891749, |
| "eval_recall_macro": 0.8768325982997476, |
| "eval_recall_micro": 0.8806831849891749, |
| "eval_runtime": 8.4659, |
| "eval_samples_per_second": 491.026, |
| "eval_steps_per_second": 61.423, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.5056947608200457, |
| "grad_norm": 89.87359619140625, |
| "learning_rate": 2.9497547590207118e-05, |
| "loss": 0.2044, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.5056947608200457, |
| "eval_accuracy": 0.8556651431320664, |
| "eval_f1_macro": 0.8466413367690651, |
| "eval_f1_micro": 0.8556651431320664, |
| "eval_loss": 0.9901952743530273, |
| "eval_precision_macro": 0.8512325160929325, |
| "eval_precision_micro": 0.8556651431320664, |
| "eval_recall_macro": 0.875509109034969, |
| "eval_recall_micro": 0.8556651431320664, |
| "eval_runtime": 8.5077, |
| "eval_samples_per_second": 488.617, |
| "eval_steps_per_second": 61.121, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.562642369020501, |
| "grad_norm": 0.013815644197165966, |
| "learning_rate": 2.759319979644478e-05, |
| "loss": 0.1914, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.562642369020501, |
| "eval_accuracy": 0.8927110897281694, |
| "eval_f1_macro": 0.8644517343056812, |
| "eval_f1_micro": 0.8927110897281694, |
| "eval_loss": 0.7158553004264832, |
| "eval_precision_macro": 0.8493787111870335, |
| "eval_precision_micro": 0.8927110897281694, |
| "eval_recall_macro": 0.898066885887026, |
| "eval_recall_micro": 0.8927110897281694, |
| "eval_runtime": 8.4749, |
| "eval_samples_per_second": 490.509, |
| "eval_steps_per_second": 61.358, |
| "step": 22500 |
| }, |
| { |
| "epoch": 2.619589977220957, |
| "grad_norm": 4.245390892028809, |
| "learning_rate": 2.5719486878601176e-05, |
| "loss": 0.1639, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.619589977220957, |
| "eval_accuracy": 0.8864565792638922, |
| "eval_f1_macro": 0.8458899447458572, |
| "eval_f1_micro": 0.8864565792638922, |
| "eval_loss": 0.729199230670929, |
| "eval_precision_macro": 0.8401968974647541, |
| "eval_precision_micro": 0.8864565792638922, |
| "eval_recall_macro": 0.8805776786360812, |
| "eval_recall_micro": 0.8864565792638922, |
| "eval_runtime": 8.4586, |
| "eval_samples_per_second": 491.451, |
| "eval_steps_per_second": 61.476, |
| "step": 23000 |
| }, |
| { |
| "epoch": 2.6765375854214124, |
| "grad_norm": 1.427230715751648, |
| "learning_rate": 2.3881035409346452e-05, |
| "loss": 0.218, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.6765375854214124, |
| "eval_accuracy": 0.8965600192446476, |
| "eval_f1_macro": 0.8710100150868682, |
| "eval_f1_micro": 0.8965600192446476, |
| "eval_loss": 0.6507639288902283, |
| "eval_precision_macro": 0.8627987859589317, |
| "eval_precision_micro": 0.8965600192446476, |
| "eval_recall_macro": 0.8951286903819264, |
| "eval_recall_micro": 0.8965600192446476, |
| "eval_runtime": 8.4672, |
| "eval_samples_per_second": 490.955, |
| "eval_steps_per_second": 61.414, |
| "step": 23500 |
| }, |
| { |
| "epoch": 2.733485193621868, |
| "grad_norm": 0.07296980172395706, |
| "learning_rate": 2.208238489376805e-05, |
| "loss": 0.1723, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.733485193621868, |
| "eval_accuracy": 0.9023334135193649, |
| "eval_f1_macro": 0.8816239296246752, |
| "eval_f1_micro": 0.9023334135193649, |
| "eval_loss": 0.6424487233161926, |
| "eval_precision_macro": 0.8835852425011356, |
| "eval_precision_micro": 0.9023334135193649, |
| "eval_recall_macro": 0.8920920740693067, |
| "eval_recall_micro": 0.9023334135193649, |
| "eval_runtime": 8.4613, |
| "eval_samples_per_second": 491.296, |
| "eval_steps_per_second": 61.456, |
| "step": 24000 |
| }, |
| { |
| "epoch": 2.7904328018223232, |
| "grad_norm": 125.23475646972656, |
| "learning_rate": 2.0331438324793375e-05, |
| "loss": 0.1682, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.7904328018223232, |
| "eval_accuracy": 0.8972816935289872, |
| "eval_f1_macro": 0.8822737330613647, |
| "eval_f1_micro": 0.8972816935289872, |
| "eval_loss": 0.6842340230941772, |
| "eval_precision_macro": 0.8827282408108892, |
| "eval_precision_micro": 0.8972816935289872, |
| "eval_recall_macro": 0.8944889961638287, |
| "eval_recall_micro": 0.8972816935289872, |
| "eval_runtime": 8.4764, |
| "eval_samples_per_second": 490.419, |
| "eval_steps_per_second": 61.347, |
| "step": 24500 |
| }, |
| { |
| "epoch": 2.847380410022779, |
| "grad_norm": 0.5098503828048706, |
| "learning_rate": 1.8628863347570347e-05, |
| "loss": 0.1441, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.847380410022779, |
| "eval_accuracy": 0.8948761125811884, |
| "eval_f1_macro": 0.8736085811230334, |
| "eval_f1_micro": 0.8948761125811884, |
| "eval_loss": 0.7110973596572876, |
| "eval_precision_macro": 0.8674915707955687, |
| "eval_precision_micro": 0.8948761125811884, |
| "eval_recall_macro": 0.901699859992757, |
| "eval_recall_micro": 0.8948761125811884, |
| "eval_runtime": 8.4537, |
| "eval_samples_per_second": 491.739, |
| "eval_steps_per_second": 61.512, |
| "step": 25000 |
| }, |
| { |
| "epoch": 2.9043280182232345, |
| "grad_norm": 0.8350435495376587, |
| "learning_rate": 1.6975596030661532e-05, |
| "loss": 0.1625, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.9043280182232345, |
| "eval_accuracy": 0.9030550878037046, |
| "eval_f1_macro": 0.8779275841497265, |
| "eval_f1_micro": 0.9030550878037046, |
| "eval_loss": 0.6505866050720215, |
| "eval_precision_macro": 0.8690908487688577, |
| "eval_precision_micro": 0.9030550878037046, |
| "eval_recall_macro": 0.9045801541095179, |
| "eval_recall_micro": 0.9030550878037046, |
| "eval_runtime": 8.4694, |
| "eval_samples_per_second": 490.823, |
| "eval_steps_per_second": 61.397, |
| "step": 25500 |
| }, |
| { |
| "epoch": 2.9612756264236904, |
| "grad_norm": 0.08747211843729019, |
| "learning_rate": 1.537918058104578e-05, |
| "loss": 0.1494, |
| "step": 26000 |
| }, |
| { |
| "epoch": 2.9612756264236904, |
| "eval_accuracy": 0.9090690401732018, |
| "eval_f1_macro": 0.8830141025055214, |
| "eval_f1_micro": 0.9090690401732018, |
| "eval_loss": 0.5693129301071167, |
| "eval_precision_macro": 0.8816974949312945, |
| "eval_precision_micro": 0.9090690401732018, |
| "eval_recall_macro": 0.9015040014659215, |
| "eval_recall_micro": 0.9090690401732018, |
| "eval_runtime": 8.5221, |
| "eval_samples_per_second": 487.793, |
| "eval_steps_per_second": 61.018, |
| "step": 26000 |
| }, |
| { |
| "epoch": 3.0182232346241458, |
| "grad_norm": 2.089958906173706, |
| "learning_rate": 1.3843558868376073e-05, |
| "loss": 0.1306, |
| "step": 26500 |
| }, |
| { |
| "epoch": 3.0182232346241458, |
| "eval_accuracy": 0.9179696896800578, |
| "eval_f1_macro": 0.8947539710239703, |
| "eval_f1_micro": 0.9179696896800578, |
| "eval_loss": 0.534771203994751, |
| "eval_precision_macro": 0.8923066730381018, |
| "eval_precision_micro": 0.9179696896800578, |
| "eval_recall_macro": 0.9121332598176911, |
| "eval_recall_micro": 0.9179696896800578, |
| "eval_runtime": 8.5905, |
| "eval_samples_per_second": 483.909, |
| "eval_steps_per_second": 60.532, |
| "step": 26500 |
| }, |
| { |
| "epoch": 3.075170842824601, |
| "grad_norm": 0.0017847216222435236, |
| "learning_rate": 1.2372522650386443e-05, |
| "loss": 0.0929, |
| "step": 27000 |
| }, |
| { |
| "epoch": 3.075170842824601, |
| "eval_accuracy": 0.9162857830165985, |
| "eval_f1_macro": 0.8899287169854497, |
| "eval_f1_micro": 0.9162857830165985, |
| "eval_loss": 0.5843378305435181, |
| "eval_precision_macro": 0.8796301981250076, |
| "eval_precision_micro": 0.9162857830165985, |
| "eval_recall_macro": 0.9152639619070458, |
| "eval_recall_micro": 0.9162857830165985, |
| "eval_runtime": 8.4184, |
| "eval_samples_per_second": 493.798, |
| "eval_steps_per_second": 61.769, |
| "step": 27000 |
| }, |
| { |
| "epoch": 3.132118451025057, |
| "grad_norm": 0.7512030601501465, |
| "learning_rate": 1.096970421028209e-05, |
| "loss": 0.1351, |
| "step": 27500 |
| }, |
| { |
| "epoch": 3.132118451025057, |
| "eval_accuracy": 0.9023334135193649, |
| "eval_f1_macro": 0.8733006387364801, |
| "eval_f1_micro": 0.9023334135193649, |
| "eval_loss": 0.6913191080093384, |
| "eval_precision_macro": 0.8611014409270759, |
| "eval_precision_micro": 0.9023334135193649, |
| "eval_recall_macro": 0.9038150903313851, |
| "eval_recall_micro": 0.9023334135193649, |
| "eval_runtime": 8.4379, |
| "eval_samples_per_second": 492.656, |
| "eval_steps_per_second": 61.626, |
| "step": 27500 |
| }, |
| { |
| "epoch": 3.1890660592255125, |
| "grad_norm": 0.005041371565312147, |
| "learning_rate": 9.638567387904402e-06, |
| "loss": 0.0907, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.1890660592255125, |
| "eval_accuracy": 0.9206158287226365, |
| "eval_f1_macro": 0.8974407788451421, |
| "eval_f1_micro": 0.9206158287226365, |
| "eval_loss": 0.5801523327827454, |
| "eval_precision_macro": 0.8882950671220116, |
| "eval_precision_micro": 0.9206158287226365, |
| "eval_recall_macro": 0.9136820970599899, |
| "eval_recall_micro": 0.9206158287226365, |
| "eval_runtime": 8.4725, |
| "eval_samples_per_second": 490.649, |
| "eval_steps_per_second": 61.375, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.2460136674259683, |
| "grad_norm": 0.6128404140472412, |
| "learning_rate": 8.382399026816216e-06, |
| "loss": 0.117, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.2460136674259683, |
| "eval_accuracy": 0.9105123887418811, |
| "eval_f1_macro": 0.8877368293575257, |
| "eval_f1_micro": 0.9105123887418811, |
| "eval_loss": 0.6602935791015625, |
| "eval_precision_macro": 0.8839485939859522, |
| "eval_precision_micro": 0.9105123887418811, |
| "eval_recall_macro": 0.905732720534913, |
| "eval_recall_micro": 0.9105123887418811, |
| "eval_runtime": 8.482, |
| "eval_samples_per_second": 490.097, |
| "eval_steps_per_second": 61.306, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.3029612756264237, |
| "grad_norm": 0.0018906695768237114, |
| "learning_rate": 7.2065771743884275e-06, |
| "loss": 0.0986, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.3029612756264237, |
| "eval_accuracy": 0.9105123887418811, |
| "eval_f1_macro": 0.887646044144043, |
| "eval_f1_micro": 0.9105123887418811, |
| "eval_loss": 0.6138319969177246, |
| "eval_precision_macro": 0.887907584612917, |
| "eval_precision_micro": 0.9105123887418811, |
| "eval_recall_macro": 0.9029879731406512, |
| "eval_recall_micro": 0.9105123887418811, |
| "eval_runtime": 8.4603, |
| "eval_samples_per_second": 491.355, |
| "eval_steps_per_second": 61.464, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.359908883826879, |
| "grad_norm": 1.5009195804595947, |
| "learning_rate": 6.109293429462298e-06, |
| "loss": 0.1196, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.359908883826879, |
| "eval_accuracy": 0.9131585277844599, |
| "eval_f1_macro": 0.8922838073892356, |
| "eval_f1_micro": 0.9131585277844599, |
| "eval_loss": 0.6528560519218445, |
| "eval_precision_macro": 0.8910787251464412, |
| "eval_precision_micro": 0.9131585277844599, |
| "eval_recall_macro": 0.9099849910959319, |
| "eval_recall_micro": 0.9131585277844599, |
| "eval_runtime": 8.4579, |
| "eval_samples_per_second": 491.492, |
| "eval_steps_per_second": 61.481, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.416856492027335, |
| "grad_norm": 2.331648349761963, |
| "learning_rate": 5.0956926304652455e-06, |
| "loss": 0.1056, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.416856492027335, |
| "eval_accuracy": 0.9131585277844599, |
| "eval_f1_macro": 0.8919235957427174, |
| "eval_f1_micro": 0.9131585277844599, |
| "eval_loss": 0.6215759515762329, |
| "eval_precision_macro": 0.884609220404625, |
| "eval_precision_micro": 0.9131585277844599, |
| "eval_recall_macro": 0.9135941017288176, |
| "eval_recall_micro": 0.9131585277844599, |
| "eval_runtime": 8.4442, |
| "eval_samples_per_second": 492.29, |
| "eval_steps_per_second": 61.581, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.4738041002277904, |
| "grad_norm": 148.2794189453125, |
| "learning_rate": 4.168277560886878e-06, |
| "loss": 0.1106, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.4738041002277904, |
| "eval_accuracy": 0.9201347125330768, |
| "eval_f1_macro": 0.891345440347701, |
| "eval_f1_micro": 0.9201347125330768, |
| "eval_loss": 0.5669803023338318, |
| "eval_precision_macro": 0.8817511779041877, |
| "eval_precision_micro": 0.9201347125330768, |
| "eval_recall_macro": 0.9129521474780394, |
| "eval_recall_micro": 0.9201347125330768, |
| "eval_runtime": 8.4316, |
| "eval_samples_per_second": 493.024, |
| "eval_steps_per_second": 61.672, |
| "step": 30500 |
| }, |
| { |
| "epoch": 3.5307517084282463, |
| "grad_norm": 4.748849868774414, |
| "learning_rate": 3.3293381943799983e-06, |
| "loss": 0.0953, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.5307517084282463, |
| "eval_accuracy": 0.9189319220591773, |
| "eval_f1_macro": 0.8934567988898604, |
| "eval_f1_micro": 0.9189319220591773, |
| "eval_loss": 0.569622278213501, |
| "eval_precision_macro": 0.8825638399763271, |
| "eval_precision_micro": 0.9189319220591773, |
| "eval_recall_macro": 0.9187150647269768, |
| "eval_recall_micro": 0.9189319220591773, |
| "eval_runtime": 8.4421, |
| "eval_samples_per_second": 492.411, |
| "eval_steps_per_second": 61.596, |
| "step": 31000 |
| }, |
| { |
| "epoch": 3.5876993166287017, |
| "grad_norm": 1.284857153892517, |
| "learning_rate": 2.580946040356764e-06, |
| "loss": 0.0989, |
| "step": 31500 |
| }, |
| { |
| "epoch": 3.5876993166287017, |
| "eval_accuracy": 0.9194130382487371, |
| "eval_f1_macro": 0.8981523298720768, |
| "eval_f1_micro": 0.9194130382487371, |
| "eval_loss": 0.5652771592140198, |
| "eval_precision_macro": 0.8854580096444836, |
| "eval_precision_micro": 0.9194130382487371, |
| "eval_recall_macro": 0.9219999897271275, |
| "eval_recall_micro": 0.9194130382487371, |
| "eval_runtime": 8.43, |
| "eval_samples_per_second": 493.122, |
| "eval_steps_per_second": 61.685, |
| "step": 31500 |
| }, |
| { |
| "epoch": 3.644646924829157, |
| "grad_norm": 1.4339042901992798, |
| "learning_rate": 1.9249490290167914e-06, |
| "loss": 0.0989, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.644646924829157, |
| "eval_accuracy": 0.9138802020687996, |
| "eval_f1_macro": 0.89412218234686, |
| "eval_f1_micro": 0.9138802020687996, |
| "eval_loss": 0.6019502282142639, |
| "eval_precision_macro": 0.882568682117879, |
| "eval_precision_micro": 0.9138802020687996, |
| "eval_recall_macro": 0.9175339639156622, |
| "eval_recall_micro": 0.9138802020687996, |
| "eval_runtime": 8.4297, |
| "eval_samples_per_second": 493.138, |
| "eval_steps_per_second": 61.687, |
| "step": 32000 |
| }, |
| { |
| "epoch": 3.7015945330296125, |
| "grad_norm": 0.7662363648414612, |
| "learning_rate": 1.3629669484372722e-06, |
| "loss": 0.0876, |
| "step": 32500 |
| }, |
| { |
| "epoch": 3.7015945330296125, |
| "eval_accuracy": 0.9167668992061583, |
| "eval_f1_macro": 0.8960433945398601, |
| "eval_f1_micro": 0.9167668992061583, |
| "eval_loss": 0.578229546546936, |
| "eval_precision_macro": 0.8823539415499122, |
| "eval_precision_micro": 0.9167668992061583, |
| "eval_recall_macro": 0.9211442367709737, |
| "eval_recall_micro": 0.9167668992061583, |
| "eval_runtime": 8.4419, |
| "eval_samples_per_second": 492.426, |
| "eval_steps_per_second": 61.598, |
| "step": 32500 |
| }, |
| { |
| "epoch": 3.7585421412300684, |
| "grad_norm": 0.8838233947753906, |
| "learning_rate": 8.963874449915156e-07, |
| "loss": 0.1004, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.7585421412300684, |
| "eval_accuracy": 0.9186913639643974, |
| "eval_f1_macro": 0.8974164477796313, |
| "eval_f1_micro": 0.9186913639643974, |
| "eval_loss": 0.5638399720191956, |
| "eval_precision_macro": 0.8850042534249818, |
| "eval_precision_micro": 0.9186913639643974, |
| "eval_recall_macro": 0.9204573653096998, |
| "eval_recall_micro": 0.9186913639643974, |
| "eval_runtime": 8.4287, |
| "eval_samples_per_second": 493.198, |
| "eval_steps_per_second": 61.694, |
| "step": 33000 |
| }, |
| { |
| "epoch": 3.8154897494305238, |
| "grad_norm": 0.8862270712852478, |
| "learning_rate": 5.263625969720654e-07, |
| "loss": 0.1163, |
| "step": 33500 |
| }, |
| { |
| "epoch": 3.8154897494305238, |
| "eval_accuracy": 0.9189319220591773, |
| "eval_f1_macro": 0.8978554181419179, |
| "eval_f1_micro": 0.9189319220591773, |
| "eval_loss": 0.5575982332229614, |
| "eval_precision_macro": 0.8842809438001717, |
| "eval_precision_micro": 0.9189319220591773, |
| "eval_recall_macro": 0.9207880969765441, |
| "eval_recall_micro": 0.9189319220591773, |
| "eval_runtime": 8.4411, |
| "eval_samples_per_second": 492.47, |
| "eval_steps_per_second": 61.603, |
| "step": 33500 |
| }, |
| { |
| "epoch": 3.8724373576309796, |
| "grad_norm": 0.0018380646361038089, |
| "learning_rate": 2.5380606987847725e-07, |
| "loss": 0.1014, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.8724373576309796, |
| "eval_accuracy": 0.9184508058696175, |
| "eval_f1_macro": 0.8970972466165426, |
| "eval_f1_micro": 0.9184508058696175, |
| "eval_loss": 0.5561444759368896, |
| "eval_precision_macro": 0.8836350789167062, |
| "eval_precision_micro": 0.9184508058696175, |
| "eval_recall_macro": 0.9206973516595941, |
| "eval_recall_micro": 0.9184508058696175, |
| "eval_runtime": 8.432, |
| "eval_samples_per_second": 493.002, |
| "eval_steps_per_second": 61.67, |
| "step": 34000 |
| }, |
| { |
| "epoch": 3.929384965831435, |
| "grad_norm": 1.3534623384475708, |
| "learning_rate": 7.939086039413291e-08, |
| "loss": 0.0948, |
| "step": 34500 |
| }, |
| { |
| "epoch": 3.929384965831435, |
| "eval_accuracy": 0.9191724801539571, |
| "eval_f1_macro": 0.8982632597992836, |
| "eval_f1_micro": 0.9191724801539571, |
| "eval_loss": 0.5567488074302673, |
| "eval_precision_macro": 0.8854065999210586, |
| "eval_precision_micro": 0.9191724801539571, |
| "eval_recall_macro": 0.9212323971384597, |
| "eval_recall_micro": 0.9191724801539571, |
| "eval_runtime": 8.4196, |
| "eval_samples_per_second": 493.73, |
| "eval_steps_per_second": 61.761, |
| "step": 34500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 35120, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.2631039537152e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|