{ "best_metric": 0.8982632597992836, "best_model_checkpoint": "./arabert_author_model/checkpoint-34500", "epoch": 3.9863325740318905, "eval_steps": 500, "global_step": 35000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05694760820045558, "grad_norm": 15.19107437133789, "learning_rate": 1.1343963553530753e-05, "loss": 2.7205, "step": 500 }, { "epoch": 0.05694760820045558, "eval_accuracy": 0.3750300697618475, "eval_f1_macro": 0.28596660166175775, "eval_f1_micro": 0.3750300697618475, "eval_loss": 2.32218337059021, "eval_precision_macro": 0.3865217733215992, "eval_precision_micro": 0.3750300697618475, "eval_recall_macro": 0.3672924033460141, "eval_recall_micro": 0.3750300697618475, "eval_runtime": 9.1613, "eval_samples_per_second": 453.755, "eval_steps_per_second": 56.76, "step": 500 }, { "epoch": 0.11389521640091116, "grad_norm": 21.725738525390625, "learning_rate": 2.2710706150341686e-05, "loss": 1.6025, "step": 1000 }, { "epoch": 0.11389521640091116, "eval_accuracy": 0.4892951647822949, "eval_f1_macro": 0.4406771878616986, "eval_f1_micro": 0.4892951647822949, "eval_loss": 1.6631975173950195, "eval_precision_macro": 0.56733686366756, "eval_precision_micro": 0.4892951647822949, "eval_recall_macro": 0.48704653807602066, "eval_recall_micro": 0.4892951647822949, "eval_runtime": 8.477, "eval_samples_per_second": 490.385, "eval_steps_per_second": 61.342, "step": 1000 }, { "epoch": 0.17084282460136674, "grad_norm": 13.716214179992676, "learning_rate": 3.4100227790432806e-05, "loss": 1.0328, "step": 1500 }, { "epoch": 0.17084282460136674, "eval_accuracy": 0.6615347606446957, "eval_f1_macro": 0.5830637518928246, "eval_f1_micro": 0.6615347606446957, "eval_loss": 1.1024270057678223, "eval_precision_macro": 0.697472543022928, "eval_precision_micro": 0.6615347606446957, "eval_recall_macro": 0.6033302465966366, "eval_recall_micro": 0.6615347606446957, "eval_runtime": 8.4612, "eval_samples_per_second": 491.299, "eval_steps_per_second": 61.457, "step": 1500 }, { "epoch": 0.22779043280182232, "grad_norm": 17.078737258911133, "learning_rate": 4.548974943052392e-05, "loss": 0.8504, "step": 2000 }, { "epoch": 0.22779043280182232, "eval_accuracy": 0.639162857830166, "eval_f1_macro": 0.5476482518700577, "eval_f1_micro": 0.639162857830166, "eval_loss": 1.2488256692886353, "eval_precision_macro": 0.668531963924838, "eval_precision_micro": 0.639162857830166, "eval_recall_macro": 0.5729925283587635, "eval_recall_micro": 0.639162857830166, "eval_runtime": 8.6485, "eval_samples_per_second": 480.664, "eval_steps_per_second": 60.126, "step": 2000 }, { "epoch": 0.2847380410022779, "grad_norm": 45.41240692138672, "learning_rate": 5.6856492027334856e-05, "loss": 0.7882, "step": 2500 }, { "epoch": 0.2847380410022779, "eval_accuracy": 0.6523935530430599, "eval_f1_macro": 0.5994264876841328, "eval_f1_micro": 0.6523935530430599, "eval_loss": 1.2463157176971436, "eval_precision_macro": 0.6529647074997418, "eval_precision_micro": 0.6523935530430599, "eval_recall_macro": 0.6436371671271747, "eval_recall_micro": 0.6523935530430599, "eval_runtime": 8.4613, "eval_samples_per_second": 491.293, "eval_steps_per_second": 61.456, "step": 2500 }, { "epoch": 0.3416856492027335, "grad_norm": 17.272947311401367, "learning_rate": 6.824601366742597e-05, "loss": 0.8087, "step": 3000 }, { "epoch": 0.3416856492027335, "eval_accuracy": 0.7055568919894154, "eval_f1_macro": 0.6265017927625092, "eval_f1_micro": 0.7055568919894154, "eval_loss": 1.2766073942184448, "eval_precision_macro": 0.6878658131219406, "eval_precision_micro": 0.7055568919894154, "eval_recall_macro": 0.6658140911489481, "eval_recall_micro": 0.7055568919894154, "eval_runtime": 8.9536, "eval_samples_per_second": 464.281, "eval_steps_per_second": 58.077, "step": 3000 }, { "epoch": 0.39863325740318906, "grad_norm": 36.27668380737305, "learning_rate": 7.96355353075171e-05, "loss": 0.7887, "step": 3500 }, { "epoch": 0.39863325740318906, "eval_accuracy": 0.7149386576858311, "eval_f1_macro": 0.6616726950770662, "eval_f1_micro": 0.7149386576858311, "eval_loss": 1.1627144813537598, "eval_precision_macro": 0.7514554135519156, "eval_precision_micro": 0.7149386576858311, "eval_recall_macro": 0.6834968200252539, "eval_recall_micro": 0.7149386576858311, "eval_runtime": 8.4816, "eval_samples_per_second": 490.118, "eval_steps_per_second": 61.309, "step": 3500 }, { "epoch": 0.45558086560364464, "grad_norm": 3.0386502742767334, "learning_rate": 7.995372539966228e-05, "loss": 0.7357, "step": 4000 }, { "epoch": 0.45558086560364464, "eval_accuracy": 0.7611258118835699, "eval_f1_macro": 0.6745353046335729, "eval_f1_micro": 0.7611258118835699, "eval_loss": 0.9807717800140381, "eval_precision_macro": 0.7007756446431578, "eval_precision_micro": 0.7611258118835699, "eval_recall_macro": 0.7425048520300992, "eval_recall_micro": 0.7611258118835699, "eval_runtime": 8.4828, "eval_samples_per_second": 490.05, "eval_steps_per_second": 61.3, "step": 4000 }, { "epoch": 0.5125284738041003, "grad_norm": 2.7381744384765625, "learning_rate": 7.980884762455173e-05, "loss": 0.7022, "step": 4500 }, { "epoch": 0.5125284738041003, "eval_accuracy": 0.76088525378879, "eval_f1_macro": 0.6871238458955197, "eval_f1_micro": 0.76088525378879, "eval_loss": 1.1158560514450073, "eval_precision_macro": 0.7591168248124284, "eval_precision_micro": 0.76088525378879, "eval_recall_macro": 0.7099914791461782, "eval_recall_micro": 0.76088525378879, "eval_runtime": 9.0538, "eval_samples_per_second": 459.145, "eval_steps_per_second": 57.435, "step": 4500 }, { "epoch": 0.5694760820045558, "grad_norm": 32.207054138183594, "learning_rate": 7.956625791551662e-05, "loss": 0.6587, "step": 5000 }, { "epoch": 0.5694760820045558, "eval_accuracy": 0.8008178975222516, "eval_f1_macro": 0.76243086871798, "eval_f1_micro": 0.8008178975222516, "eval_loss": 0.8932181000709534, "eval_precision_macro": 0.7876270109291943, "eval_precision_micro": 0.8008178975222516, "eval_recall_macro": 0.7876260076814952, "eval_recall_micro": 0.8008178975222516, "eval_runtime": 8.5148, "eval_samples_per_second": 488.207, "eval_steps_per_second": 61.07, "step": 5000 }, { "epoch": 0.6264236902050114, "grad_norm": 0.29054978489875793, "learning_rate": 7.922558317223566e-05, "loss": 0.5719, "step": 5500 }, { "epoch": 0.6264236902050114, "eval_accuracy": 0.773875390906904, "eval_f1_macro": 0.7249768597960354, "eval_f1_micro": 0.773875390906904, "eval_loss": 0.9966481328010559, "eval_precision_macro": 0.7358501532705917, "eval_precision_micro": 0.773875390906904, "eval_recall_macro": 0.7614420126439011, "eval_recall_micro": 0.773875390906904, "eval_runtime": 8.4793, "eval_samples_per_second": 490.251, "eval_steps_per_second": 61.326, "step": 5500 }, { "epoch": 0.683371298405467, "grad_norm": 1.0872896909713745, "learning_rate": 7.878805260363261e-05, "loss": 0.6425, "step": 6000 }, { "epoch": 0.683371298405467, "eval_accuracy": 0.771710368053885, "eval_f1_macro": 0.7254677224325083, "eval_f1_micro": 0.771710368053885, "eval_loss": 0.9605371952056885, "eval_precision_macro": 0.8005336996598393, "eval_precision_micro": 0.771710368053885, "eval_recall_macro": 0.7547512504988463, "eval_recall_micro": 0.771710368053885, "eval_runtime": 8.4743, "eval_samples_per_second": 490.54, "eval_steps_per_second": 61.362, "step": 6000 }, { "epoch": 0.7403189066059226, "grad_norm": 189.1195068359375, "learning_rate": 7.82547465603587e-05, "loss": 0.5433, "step": 6500 }, { "epoch": 0.7403189066059226, "eval_accuracy": 0.7700264613904257, "eval_f1_macro": 0.740451647791416, "eval_f1_micro": 0.7700264613904257, "eval_loss": 1.0783036947250366, "eval_precision_macro": 0.769666104173451, "eval_precision_micro": 0.7700264613904257, "eval_recall_macro": 0.7659213860833466, "eval_recall_micro": 0.7700264613904257, "eval_runtime": 8.4608, "eval_samples_per_second": 491.328, "eval_steps_per_second": 61.46, "step": 6500 }, { "epoch": 0.7972665148063781, "grad_norm": 0.5625237822532654, "learning_rate": 7.762833068916386e-05, "loss": 0.5736, "step": 7000 }, { "epoch": 0.7972665148063781, "eval_accuracy": 0.7782054366129421, "eval_f1_macro": 0.6779030343365783, "eval_f1_micro": 0.7782054366129421, "eval_loss": 1.0509027242660522, "eval_precision_macro": 0.7339555194909612, "eval_precision_micro": 0.7782054366129421, "eval_recall_macro": 0.6828886626680506, "eval_recall_micro": 0.7782054366129421, "eval_runtime": 8.481, "eval_samples_per_second": 490.156, "eval_steps_per_second": 61.314, "step": 7000 }, { "epoch": 0.8542141230068337, "grad_norm": 64.8875503540039, "learning_rate": 7.690784156928418e-05, "loss": 0.5273, "step": 7500 }, { "epoch": 0.8542141230068337, "eval_accuracy": 0.7774837623286024, "eval_f1_macro": 0.7006759117702724, "eval_f1_micro": 0.7774837623286024, "eval_loss": 1.083090901374817, "eval_precision_macro": 0.7615638623524419, "eval_precision_micro": 0.7774837623286024, "eval_recall_macro": 0.7385809772355115, "eval_recall_micro": 0.7774837623286024, "eval_runtime": 8.4704, "eval_samples_per_second": 490.769, "eval_steps_per_second": 61.39, "step": 7500 }, { "epoch": 0.9111617312072893, "grad_norm": 0.9078112840652466, "learning_rate": 7.609621959255558e-05, "loss": 0.5268, "step": 8000 }, { "epoch": 0.9111617312072893, "eval_accuracy": 0.7445273033437575, "eval_f1_macro": 0.699580887402278, "eval_f1_micro": 0.7445273033437575, "eval_loss": 1.3979923725128174, "eval_precision_macro": 0.7001048011829232, "eval_precision_micro": 0.7445273033437575, "eval_recall_macro": 0.7726266308290096, "eval_recall_micro": 0.7445273033437575, "eval_runtime": 8.4384, "eval_samples_per_second": 492.631, "eval_steps_per_second": 61.623, "step": 8000 }, { "epoch": 0.9681093394077449, "grad_norm": 42.65549850463867, "learning_rate": 7.519735782617663e-05, "loss": 0.5462, "step": 8500 }, { "epoch": 0.9681093394077449, "eval_accuracy": 0.8287226365167187, "eval_f1_macro": 0.7785313462283256, "eval_f1_micro": 0.8287226365167187, "eval_loss": 0.9026873111724854, "eval_precision_macro": 0.7868255645583773, "eval_precision_micro": 0.8287226365167187, "eval_recall_macro": 0.8024053906273642, "eval_recall_micro": 0.8287226365167187, "eval_runtime": 8.4335, "eval_samples_per_second": 492.916, "eval_steps_per_second": 61.659, "step": 8500 }, { "epoch": 1.0250569476082005, "grad_norm": 0.9462873935699463, "learning_rate": 7.420987383057407e-05, "loss": 0.486, "step": 9000 }, { "epoch": 1.0250569476082005, "eval_accuracy": 0.7211931681501083, "eval_f1_macro": 0.6932948143152837, "eval_f1_micro": 0.7211931681501083, "eval_loss": 1.7263842821121216, "eval_precision_macro": 0.7360552900892146, "eval_precision_micro": 0.7211931681501083, "eval_recall_macro": 0.7409941847523269, "eval_recall_micro": 0.7211931681501083, "eval_runtime": 8.473, "eval_samples_per_second": 490.615, "eval_steps_per_second": 61.371, "step": 9000 }, { "epoch": 1.082004555808656, "grad_norm": 0.8366897106170654, "learning_rate": 7.314014528807089e-05, "loss": 0.4458, "step": 9500 }, { "epoch": 1.082004555808656, "eval_accuracy": 0.8186191965359635, "eval_f1_macro": 0.7675006381723768, "eval_f1_micro": 0.8186191965359635, "eval_loss": 0.9773014783859253, "eval_precision_macro": 0.7844621076644617, "eval_precision_micro": 0.8186191965359635, "eval_recall_macro": 0.8041851318048103, "eval_recall_micro": 0.8186191965359635, "eval_runtime": 8.4706, "eval_samples_per_second": 490.754, "eval_steps_per_second": 61.389, "step": 9500 }, { "epoch": 1.1389521640091116, "grad_norm": 2.115098237991333, "learning_rate": 7.198652696785955e-05, "loss": 0.4102, "step": 10000 }, { "epoch": 1.1389521640091116, "eval_accuracy": 0.8316093336540774, "eval_f1_macro": 0.7933624073358755, "eval_f1_micro": 0.8316093336540774, "eval_loss": 0.967036247253418, "eval_precision_macro": 0.7949251475033909, "eval_precision_micro": 0.8316093336540774, "eval_recall_macro": 0.8274655786829063, "eval_recall_micro": 0.8316093336540774, "eval_runtime": 8.4509, "eval_samples_per_second": 491.9, "eval_steps_per_second": 61.532, "step": 10000 }, { "epoch": 1.1958997722095672, "grad_norm": 1.0413740873336792, "learning_rate": 7.075392750273938e-05, "loss": 0.3773, "step": 10500 }, { "epoch": 1.1958997722095672, "eval_accuracy": 0.8171758479672842, "eval_f1_macro": 0.7981622962245145, "eval_f1_micro": 0.8171758479672842, "eval_loss": 0.9198176860809326, "eval_precision_macro": 0.819291401144176, "eval_precision_micro": 0.8171758479672842, "eval_recall_macro": 0.8245482183363974, "eval_recall_micro": 0.8171758479672842, "eval_runtime": 8.4577, "eval_samples_per_second": 491.505, "eval_steps_per_second": 61.482, "step": 10500 }, { "epoch": 1.2528473804100229, "grad_norm": 7.1126203536987305, "learning_rate": 6.94453904277921e-05, "loss": 0.3796, "step": 11000 }, { "epoch": 1.2528473804100229, "eval_accuracy": 0.7741159490016839, "eval_f1_macro": 0.7334267874337288, "eval_f1_micro": 0.7741159490016839, "eval_loss": 1.324471116065979, "eval_precision_macro": 0.7554977548108798, "eval_precision_micro": 0.7741159490016839, "eval_recall_macro": 0.7865189643712818, "eval_recall_micro": 0.7741159490016839, "eval_runtime": 8.4546, "eval_samples_per_second": 491.688, "eval_steps_per_second": 61.505, "step": 11000 }, { "epoch": 1.3097949886104785, "grad_norm": 85.53689575195312, "learning_rate": 6.806414678327537e-05, "loss": 0.4432, "step": 11500 }, { "epoch": 1.3097949886104785, "eval_accuracy": 0.8092374308395478, "eval_f1_macro": 0.7438442519057924, "eval_f1_micro": 0.8092374308395478, "eval_loss": 1.1105079650878906, "eval_precision_macro": 0.7748012629794462, "eval_precision_micro": 0.8092374308395478, "eval_recall_macro": 0.7783932685192183, "eval_recall_micro": 0.8092374308395478, "eval_runtime": 8.4968, "eval_samples_per_second": 489.244, "eval_steps_per_second": 61.2, "step": 11500 }, { "epoch": 1.366742596810934, "grad_norm": 1.0569897890090942, "learning_rate": 6.661360713653681e-05, "loss": 0.389, "step": 12000 }, { "epoch": 1.366742596810934, "eval_accuracy": 0.8128458022612461, "eval_f1_macro": 0.7595338289100665, "eval_f1_micro": 0.8128458022612461, "eval_loss": 1.0267034769058228, "eval_precision_macro": 0.7590761169934961, "eval_precision_micro": 0.8128458022612461, "eval_recall_macro": 0.8036606747506161, "eval_recall_micro": 0.8128458022612461, "eval_runtime": 8.4648, "eval_samples_per_second": 491.09, "eval_steps_per_second": 61.431, "step": 12000 }, { "epoch": 1.4236902050113895, "grad_norm": 14.686357498168945, "learning_rate": 6.509735316063996e-05, "loss": 0.6087, "step": 12500 }, { "epoch": 1.4236902050113895, "eval_accuracy": 0.8443589126774116, "eval_f1_macro": 0.7997974993396904, "eval_f1_micro": 0.8443589126774116, "eval_loss": 0.9193519353866577, "eval_precision_macro": 0.7894219117254144, "eval_precision_micro": 0.8443589126774116, "eval_recall_macro": 0.833108777347993, "eval_recall_micro": 0.8443589126774116, "eval_runtime": 8.4379, "eval_samples_per_second": 492.657, "eval_steps_per_second": 61.627, "step": 12500 }, { "epoch": 1.4806378132118452, "grad_norm": 0.05014890432357788, "learning_rate": 6.352234452003862e-05, "loss": 0.4803, "step": 13000 }, { "epoch": 1.4806378132118452, "eval_accuracy": 0.8535001202790474, "eval_f1_macro": 0.8265458036830905, "eval_f1_micro": 0.8535001202790474, "eval_loss": 0.879317581653595, "eval_precision_macro": 0.8329036145166532, "eval_precision_micro": 0.8535001202790474, "eval_recall_macro": 0.8340628674797995, "eval_recall_micro": 0.8535001202790474, "eval_runtime": 8.4157, "eval_samples_per_second": 493.96, "eval_steps_per_second": 61.79, "step": 13000 }, { "epoch": 1.5375854214123006, "grad_norm": 40.28041076660156, "learning_rate": 6.188948654276723e-05, "loss": 0.5513, "step": 13500 }, { "epoch": 1.5375854214123006, "eval_accuracy": 0.7782054366129421, "eval_f1_macro": 0.7066835334217871, "eval_f1_micro": 0.7782054366129421, "eval_loss": 1.1909141540527344, "eval_precision_macro": 0.7255202837659691, "eval_precision_micro": 0.7782054366129421, "eval_recall_macro": 0.7402758652568006, "eval_recall_micro": 0.7782054366129421, "eval_runtime": 8.4835, "eval_samples_per_second": 490.012, "eval_steps_per_second": 61.296, "step": 13500 }, { "epoch": 1.5945330296127562, "grad_norm": 8.929847717285156, "learning_rate": 6.019936353958699e-05, "loss": 0.693, "step": 14000 }, { "epoch": 1.5945330296127562, "eval_accuracy": 0.2460909309598268, "eval_f1_macro": 0.20336556842910194, "eval_f1_micro": 0.2460909309598268, "eval_loss": 2.8277578353881836, "eval_precision_macro": 0.5226671539146224, "eval_precision_micro": 0.2460909309598268, "eval_recall_macro": 0.1936798727163823, "eval_recall_micro": 0.2460909309598268, "eval_runtime": 8.4782, "eval_samples_per_second": 490.315, "eval_steps_per_second": 61.334, "step": 14000 }, { "epoch": 1.6514806378132119, "grad_norm": 28.1862735748291, "learning_rate": 5.8459364260048594e-05, "loss": 0.9646, "step": 14500 }, { "epoch": 1.6514806378132119, "eval_accuracy": 0.8246331489054607, "eval_f1_macro": 0.7913907456133474, "eval_f1_micro": 0.8246331489054607, "eval_loss": 0.948131799697876, "eval_precision_macro": 0.799701208597097, "eval_precision_micro": 0.8246331489054607, "eval_recall_macro": 0.8123299807423895, "eval_recall_micro": 0.8246331489054607, "eval_runtime": 8.4643, "eval_samples_per_second": 491.124, "eval_steps_per_second": 61.435, "step": 14500 }, { "epoch": 1.7084282460136673, "grad_norm": 7.083284854888916, "learning_rate": 5.6673785111054136e-05, "loss": 0.462, "step": 15000 }, { "epoch": 1.7084282460136673, "eval_accuracy": 0.8472456098147703, "eval_f1_macro": 0.8091993297668394, "eval_f1_micro": 0.8472456098147703, "eval_loss": 0.8667464256286621, "eval_precision_macro": 0.8198322314440802, "eval_precision_micro": 0.8472456098147703, "eval_recall_macro": 0.8266325847969769, "eval_recall_micro": 0.8472456098147703, "eval_runtime": 8.4589, "eval_samples_per_second": 491.435, "eval_steps_per_second": 61.474, "step": 15000 }, { "epoch": 1.7653758542141231, "grad_norm": 65.16443634033203, "learning_rate": 5.484703504533721e-05, "loss": 0.4093, "step": 15500 }, { "epoch": 1.7653758542141231, "eval_accuracy": 0.8116430117873467, "eval_f1_macro": 0.7700885656504642, "eval_f1_micro": 0.8116430117873467, "eval_loss": 1.1932649612426758, "eval_precision_macro": 0.8232095109547221, "eval_precision_micro": 0.8116430117873467, "eval_recall_macro": 0.775800293436143, "eval_recall_micro": 0.8116430117873467, "eval_runtime": 8.4735, "eval_samples_per_second": 490.589, "eval_steps_per_second": 61.368, "step": 15500 }, { "epoch": 1.8223234624145785, "grad_norm": 15.238636016845703, "learning_rate": 5.2983624674875084e-05, "loss": 0.349, "step": 16000 }, { "epoch": 1.8223234624145785, "eval_accuracy": 0.8799615107048352, "eval_f1_macro": 0.8377281287378512, "eval_f1_micro": 0.8799615107048352, "eval_loss": 0.7749123573303223, "eval_precision_macro": 0.8358272736538074, "eval_precision_micro": 0.8799615107048352, "eval_recall_macro": 0.8723899950094214, "eval_recall_micro": 0.8799615107048352, "eval_runtime": 8.4617, "eval_samples_per_second": 491.272, "eval_steps_per_second": 61.453, "step": 16000 }, { "epoch": 1.8792710706150342, "grad_norm": 1.931815505027771, "learning_rate": 5.108815513328386e-05, "loss": 0.3333, "step": 16500 }, { "epoch": 1.8792710706150342, "eval_accuracy": 0.8799615107048352, "eval_f1_macro": 0.851689807067872, "eval_f1_micro": 0.8799615107048352, "eval_loss": 0.6482954025268555, "eval_precision_macro": 0.8439980638748368, "eval_precision_micro": 0.8799615107048352, "eval_recall_macro": 0.8709009740944947, "eval_recall_micro": 0.8799615107048352, "eval_runtime": 8.4707, "eval_samples_per_second": 490.751, "eval_steps_per_second": 61.388, "step": 16500 }, { "epoch": 1.9362186788154898, "grad_norm": 14.734030723571777, "learning_rate": 4.916530671469754e-05, "loss": 0.3449, "step": 17000 }, { "epoch": 1.9362186788154898, "eval_accuracy": 0.7890305508780371, "eval_f1_macro": 0.7375164837600725, "eval_f1_micro": 0.7890305508780371, "eval_loss": 1.2130000591278076, "eval_precision_macro": 0.7696829158091395, "eval_precision_micro": 0.7890305508780371, "eval_recall_macro": 0.7545432686041814, "eval_recall_micro": 0.7890305508780371, "eval_runtime": 8.448, "eval_samples_per_second": 492.07, "eval_steps_per_second": 61.553, "step": 17000 }, { "epoch": 1.9931662870159452, "grad_norm": 4.0362091064453125, "learning_rate": 4.7219827317183907e-05, "loss": 0.2982, "step": 17500 }, { "epoch": 1.9931662870159452, "eval_accuracy": 0.8599951888381044, "eval_f1_macro": 0.8354955249556116, "eval_f1_micro": 0.8599951888381044, "eval_loss": 0.8803524374961853, "eval_precision_macro": 0.8342493543222153, "eval_precision_micro": 0.8599951888381044, "eval_recall_macro": 0.8556466165427711, "eval_recall_micro": 0.8599951888381044, "eval_runtime": 8.4723, "eval_samples_per_second": 490.66, "eval_steps_per_second": 61.377, "step": 17500 }, { "epoch": 2.050113895216401, "grad_norm": 0.7534123659133911, "learning_rate": 4.525652071923279e-05, "loss": 0.2348, "step": 18000 }, { "epoch": 2.050113895216401, "eval_accuracy": 0.8448400288669714, "eval_f1_macro": 0.8190720986060728, "eval_f1_micro": 0.8448400288669714, "eval_loss": 1.1245763301849365, "eval_precision_macro": 0.8337767355676724, "eval_precision_micro": 0.8448400288669714, "eval_recall_macro": 0.8557743090961959, "eval_recall_micro": 0.8448400288669714, "eval_runtime": 8.4721, "eval_samples_per_second": 490.667, "eval_steps_per_second": 61.378, "step": 18000 }, { "epoch": 2.1070615034168565, "grad_norm": 1.4927374124526978, "learning_rate": 4.328023471826429e-05, "loss": 0.2299, "step": 18500 }, { "epoch": 2.1070615034168565, "eval_accuracy": 0.8708203031031995, "eval_f1_macro": 0.8303241771039817, "eval_f1_micro": 0.8708203031031995, "eval_loss": 0.8329204320907593, "eval_precision_macro": 0.83267401976066, "eval_precision_micro": 0.8708203031031995, "eval_recall_macro": 0.8610797711013763, "eval_recall_micro": 0.8708203031031995, "eval_runtime": 8.4578, "eval_samples_per_second": 491.498, "eval_steps_per_second": 61.482, "step": 18500 }, { "epoch": 2.164009111617312, "grad_norm": 1.7383619546890259, "learning_rate": 4.129584916044555e-05, "loss": 0.2468, "step": 19000 }, { "epoch": 2.164009111617312, "eval_accuracy": 0.8773153716622565, "eval_f1_macro": 0.846177762458016, "eval_f1_micro": 0.8773153716622565, "eval_loss": 0.7664415836334229, "eval_precision_macro": 0.8368864405451267, "eval_precision_micro": 0.8773153716622565, "eval_recall_macro": 0.8780489137762446, "eval_recall_micro": 0.8773153716622565, "eval_runtime": 8.4381, "eval_samples_per_second": 492.645, "eval_steps_per_second": 61.625, "step": 19000 }, { "epoch": 2.2209567198177678, "grad_norm": 400.013916015625, "learning_rate": 3.930826389137262e-05, "loss": 0.1899, "step": 19500 }, { "epoch": 2.2209567198177678, "eval_accuracy": 0.8862160211691124, "eval_f1_macro": 0.8558563382552302, "eval_f1_micro": 0.8862160211691124, "eval_loss": 0.7495226263999939, "eval_precision_macro": 0.8555231307289006, "eval_precision_micro": 0.8862160211691124, "eval_recall_macro": 0.8758801511510593, "eval_recall_micro": 0.8862160211691124, "eval_runtime": 8.4445, "eval_samples_per_second": 492.274, "eval_steps_per_second": 61.579, "step": 19500 }, { "epoch": 2.277904328018223, "grad_norm": 2.0449647903442383, "learning_rate": 3.732635344608829e-05, "loss": 0.1977, "step": 20000 }, { "epoch": 2.277904328018223, "eval_accuracy": 0.8551840269425066, "eval_f1_macro": 0.8198359165864053, "eval_f1_micro": 0.8551840269425066, "eval_loss": 1.0149922370910645, "eval_precision_macro": 0.8328051392132652, "eval_precision_micro": 0.8551840269425066, "eval_recall_macro": 0.853902169918103, "eval_recall_micro": 0.8551840269425066, "eval_runtime": 8.4355, "eval_samples_per_second": 492.798, "eval_steps_per_second": 61.644, "step": 20000 }, { "epoch": 2.334851936218679, "grad_norm": 0.7749654650688171, "learning_rate": 3.534706966798757e-05, "loss": 0.2314, "step": 20500 }, { "epoch": 2.334851936218679, "eval_accuracy": 0.8948761125811884, "eval_f1_macro": 0.8659539030629905, "eval_f1_micro": 0.8948761125811884, "eval_loss": 0.6898870468139648, "eval_precision_macro": 0.8564830162508421, "eval_precision_micro": 0.8948761125811884, "eval_recall_macro": 0.8978688795664334, "eval_recall_micro": 0.8948761125811884, "eval_runtime": 8.4412, "eval_samples_per_second": 492.466, "eval_steps_per_second": 61.603, "step": 20500 }, { "epoch": 2.3917995444191344, "grad_norm": 0.007438243832439184, "learning_rate": 3.337927490728384e-05, "loss": 0.203, "step": 21000 }, { "epoch": 2.3917995444191344, "eval_accuracy": 0.8984844840028867, "eval_f1_macro": 0.8654367599963082, "eval_f1_micro": 0.8984844840028867, "eval_loss": 0.6225568652153015, "eval_precision_macro": 0.8710584795534867, "eval_precision_micro": 0.8984844840028867, "eval_recall_macro": 0.8907316535034207, "eval_recall_micro": 0.8984844840028867, "eval_runtime": 8.4582, "eval_samples_per_second": 491.477, "eval_steps_per_second": 61.479, "step": 21000 }, { "epoch": 2.44874715261959, "grad_norm": 1.0429240465164185, "learning_rate": 3.142782804357047e-05, "loss": 0.1784, "step": 21500 }, { "epoch": 2.44874715261959, "eval_accuracy": 0.8806831849891749, "eval_f1_macro": 0.8516903935082014, "eval_f1_micro": 0.8806831849891749, "eval_loss": 0.8446455001831055, "eval_precision_macro": 0.86002376045135, "eval_precision_micro": 0.8806831849891749, "eval_recall_macro": 0.8768325982997476, "eval_recall_micro": 0.8806831849891749, "eval_runtime": 8.4659, "eval_samples_per_second": 491.026, "eval_steps_per_second": 61.423, "step": 21500 }, { "epoch": 2.5056947608200457, "grad_norm": 89.87359619140625, "learning_rate": 2.9497547590207118e-05, "loss": 0.2044, "step": 22000 }, { "epoch": 2.5056947608200457, "eval_accuracy": 0.8556651431320664, "eval_f1_macro": 0.8466413367690651, "eval_f1_micro": 0.8556651431320664, "eval_loss": 0.9901952743530273, "eval_precision_macro": 0.8512325160929325, "eval_precision_micro": 0.8556651431320664, "eval_recall_macro": 0.875509109034969, "eval_recall_micro": 0.8556651431320664, "eval_runtime": 8.5077, "eval_samples_per_second": 488.617, "eval_steps_per_second": 61.121, "step": 22000 }, { "epoch": 2.562642369020501, "grad_norm": 0.013815644197165966, "learning_rate": 2.759319979644478e-05, "loss": 0.1914, "step": 22500 }, { "epoch": 2.562642369020501, "eval_accuracy": 0.8927110897281694, "eval_f1_macro": 0.8644517343056812, "eval_f1_micro": 0.8927110897281694, "eval_loss": 0.7158553004264832, "eval_precision_macro": 0.8493787111870335, "eval_precision_micro": 0.8927110897281694, "eval_recall_macro": 0.898066885887026, "eval_recall_micro": 0.8927110897281694, "eval_runtime": 8.4749, "eval_samples_per_second": 490.509, "eval_steps_per_second": 61.358, "step": 22500 }, { "epoch": 2.619589977220957, "grad_norm": 4.245390892028809, "learning_rate": 2.5719486878601176e-05, "loss": 0.1639, "step": 23000 }, { "epoch": 2.619589977220957, "eval_accuracy": 0.8864565792638922, "eval_f1_macro": 0.8458899447458572, "eval_f1_micro": 0.8864565792638922, "eval_loss": 0.729199230670929, "eval_precision_macro": 0.8401968974647541, "eval_precision_micro": 0.8864565792638922, "eval_recall_macro": 0.8805776786360812, "eval_recall_micro": 0.8864565792638922, "eval_runtime": 8.4586, "eval_samples_per_second": 491.451, "eval_steps_per_second": 61.476, "step": 23000 }, { "epoch": 2.6765375854214124, "grad_norm": 1.427230715751648, "learning_rate": 2.3881035409346452e-05, "loss": 0.218, "step": 23500 }, { "epoch": 2.6765375854214124, "eval_accuracy": 0.8965600192446476, "eval_f1_macro": 0.8710100150868682, "eval_f1_micro": 0.8965600192446476, "eval_loss": 0.6507639288902283, "eval_precision_macro": 0.8627987859589317, "eval_precision_micro": 0.8965600192446476, "eval_recall_macro": 0.8951286903819264, "eval_recall_micro": 0.8965600192446476, "eval_runtime": 8.4672, "eval_samples_per_second": 490.955, "eval_steps_per_second": 61.414, "step": 23500 }, { "epoch": 2.733485193621868, "grad_norm": 0.07296980172395706, "learning_rate": 2.208238489376805e-05, "loss": 0.1723, "step": 24000 }, { "epoch": 2.733485193621868, "eval_accuracy": 0.9023334135193649, "eval_f1_macro": 0.8816239296246752, "eval_f1_micro": 0.9023334135193649, "eval_loss": 0.6424487233161926, "eval_precision_macro": 0.8835852425011356, "eval_precision_micro": 0.9023334135193649, "eval_recall_macro": 0.8920920740693067, "eval_recall_micro": 0.9023334135193649, "eval_runtime": 8.4613, "eval_samples_per_second": 491.296, "eval_steps_per_second": 61.456, "step": 24000 }, { "epoch": 2.7904328018223232, "grad_norm": 125.23475646972656, "learning_rate": 2.0331438324793375e-05, "loss": 0.1682, "step": 24500 }, { "epoch": 2.7904328018223232, "eval_accuracy": 0.8972816935289872, "eval_f1_macro": 0.8822737330613647, "eval_f1_micro": 0.8972816935289872, "eval_loss": 0.6842340230941772, "eval_precision_macro": 0.8827282408108892, "eval_precision_micro": 0.8972816935289872, "eval_recall_macro": 0.8944889961638287, "eval_recall_micro": 0.8972816935289872, "eval_runtime": 8.4764, "eval_samples_per_second": 490.419, "eval_steps_per_second": 61.347, "step": 24500 }, { "epoch": 2.847380410022779, "grad_norm": 0.5098503828048706, "learning_rate": 1.8628863347570347e-05, "loss": 0.1441, "step": 25000 }, { "epoch": 2.847380410022779, "eval_accuracy": 0.8948761125811884, "eval_f1_macro": 0.8736085811230334, "eval_f1_micro": 0.8948761125811884, "eval_loss": 0.7110973596572876, "eval_precision_macro": 0.8674915707955687, "eval_precision_micro": 0.8948761125811884, "eval_recall_macro": 0.901699859992757, "eval_recall_micro": 0.8948761125811884, "eval_runtime": 8.4537, "eval_samples_per_second": 491.739, "eval_steps_per_second": 61.512, "step": 25000 }, { "epoch": 2.9043280182232345, "grad_norm": 0.8350435495376587, "learning_rate": 1.6975596030661532e-05, "loss": 0.1625, "step": 25500 }, { "epoch": 2.9043280182232345, "eval_accuracy": 0.9030550878037046, "eval_f1_macro": 0.8779275841497265, "eval_f1_micro": 0.9030550878037046, "eval_loss": 0.6505866050720215, "eval_precision_macro": 0.8690908487688577, "eval_precision_micro": 0.9030550878037046, "eval_recall_macro": 0.9045801541095179, "eval_recall_micro": 0.9030550878037046, "eval_runtime": 8.4694, "eval_samples_per_second": 490.823, "eval_steps_per_second": 61.397, "step": 25500 }, { "epoch": 2.9612756264236904, "grad_norm": 0.08747211843729019, "learning_rate": 1.537918058104578e-05, "loss": 0.1494, "step": 26000 }, { "epoch": 2.9612756264236904, "eval_accuracy": 0.9090690401732018, "eval_f1_macro": 0.8830141025055214, "eval_f1_micro": 0.9090690401732018, "eval_loss": 0.5693129301071167, "eval_precision_macro": 0.8816974949312945, "eval_precision_micro": 0.9090690401732018, "eval_recall_macro": 0.9015040014659215, "eval_recall_micro": 0.9090690401732018, "eval_runtime": 8.5221, "eval_samples_per_second": 487.793, "eval_steps_per_second": 61.018, "step": 26000 }, { "epoch": 3.0182232346241458, "grad_norm": 2.089958906173706, "learning_rate": 1.3843558868376073e-05, "loss": 0.1306, "step": 26500 }, { "epoch": 3.0182232346241458, "eval_accuracy": 0.9179696896800578, "eval_f1_macro": 0.8947539710239703, "eval_f1_micro": 0.9179696896800578, "eval_loss": 0.534771203994751, "eval_precision_macro": 0.8923066730381018, "eval_precision_micro": 0.9179696896800578, "eval_recall_macro": 0.9121332598176911, "eval_recall_micro": 0.9179696896800578, "eval_runtime": 8.5905, "eval_samples_per_second": 483.909, "eval_steps_per_second": 60.532, "step": 26500 }, { "epoch": 3.075170842824601, "grad_norm": 0.0017847216222435236, "learning_rate": 1.2372522650386443e-05, "loss": 0.0929, "step": 27000 }, { "epoch": 3.075170842824601, "eval_accuracy": 0.9162857830165985, "eval_f1_macro": 0.8899287169854497, "eval_f1_micro": 0.9162857830165985, "eval_loss": 0.5843378305435181, "eval_precision_macro": 0.8796301981250076, "eval_precision_micro": 0.9162857830165985, "eval_recall_macro": 0.9152639619070458, "eval_recall_micro": 0.9162857830165985, "eval_runtime": 8.4184, "eval_samples_per_second": 493.798, "eval_steps_per_second": 61.769, "step": 27000 }, { "epoch": 3.132118451025057, "grad_norm": 0.7512030601501465, "learning_rate": 1.096970421028209e-05, "loss": 0.1351, "step": 27500 }, { "epoch": 3.132118451025057, "eval_accuracy": 0.9023334135193649, "eval_f1_macro": 0.8733006387364801, "eval_f1_micro": 0.9023334135193649, "eval_loss": 0.6913191080093384, "eval_precision_macro": 0.8611014409270759, "eval_precision_micro": 0.9023334135193649, "eval_recall_macro": 0.9038150903313851, "eval_recall_micro": 0.9023334135193649, "eval_runtime": 8.4379, "eval_samples_per_second": 492.656, "eval_steps_per_second": 61.626, "step": 27500 }, { "epoch": 3.1890660592255125, "grad_norm": 0.005041371565312147, "learning_rate": 9.638567387904402e-06, "loss": 0.0907, "step": 28000 }, { "epoch": 3.1890660592255125, "eval_accuracy": 0.9206158287226365, "eval_f1_macro": 0.8974407788451421, "eval_f1_micro": 0.9206158287226365, "eval_loss": 0.5801523327827454, "eval_precision_macro": 0.8882950671220116, "eval_precision_micro": 0.9206158287226365, "eval_recall_macro": 0.9136820970599899, "eval_recall_micro": 0.9206158287226365, "eval_runtime": 8.4725, "eval_samples_per_second": 490.649, "eval_steps_per_second": 61.375, "step": 28000 }, { "epoch": 3.2460136674259683, "grad_norm": 0.6128404140472412, "learning_rate": 8.382399026816216e-06, "loss": 0.117, "step": 28500 }, { "epoch": 3.2460136674259683, "eval_accuracy": 0.9105123887418811, "eval_f1_macro": 0.8877368293575257, "eval_f1_micro": 0.9105123887418811, "eval_loss": 0.6602935791015625, "eval_precision_macro": 0.8839485939859522, "eval_precision_micro": 0.9105123887418811, "eval_recall_macro": 0.905732720534913, "eval_recall_micro": 0.9105123887418811, "eval_runtime": 8.482, "eval_samples_per_second": 490.097, "eval_steps_per_second": 61.306, "step": 28500 }, { "epoch": 3.3029612756264237, "grad_norm": 0.0018906695768237114, "learning_rate": 7.2065771743884275e-06, "loss": 0.0986, "step": 29000 }, { "epoch": 3.3029612756264237, "eval_accuracy": 0.9105123887418811, "eval_f1_macro": 0.887646044144043, "eval_f1_micro": 0.9105123887418811, "eval_loss": 0.6138319969177246, "eval_precision_macro": 0.887907584612917, "eval_precision_micro": 0.9105123887418811, "eval_recall_macro": 0.9029879731406512, "eval_recall_micro": 0.9105123887418811, "eval_runtime": 8.4603, "eval_samples_per_second": 491.355, "eval_steps_per_second": 61.464, "step": 29000 }, { "epoch": 3.359908883826879, "grad_norm": 1.5009195804595947, "learning_rate": 6.109293429462298e-06, "loss": 0.1196, "step": 29500 }, { "epoch": 3.359908883826879, "eval_accuracy": 0.9131585277844599, "eval_f1_macro": 0.8922838073892356, "eval_f1_micro": 0.9131585277844599, "eval_loss": 0.6528560519218445, "eval_precision_macro": 0.8910787251464412, "eval_precision_micro": 0.9131585277844599, "eval_recall_macro": 0.9099849910959319, "eval_recall_micro": 0.9131585277844599, "eval_runtime": 8.4579, "eval_samples_per_second": 491.492, "eval_steps_per_second": 61.481, "step": 29500 }, { "epoch": 3.416856492027335, "grad_norm": 2.331648349761963, "learning_rate": 5.0956926304652455e-06, "loss": 0.1056, "step": 30000 }, { "epoch": 3.416856492027335, "eval_accuracy": 0.9131585277844599, "eval_f1_macro": 0.8919235957427174, "eval_f1_micro": 0.9131585277844599, "eval_loss": 0.6215759515762329, "eval_precision_macro": 0.884609220404625, "eval_precision_micro": 0.9131585277844599, "eval_recall_macro": 0.9135941017288176, "eval_recall_micro": 0.9131585277844599, "eval_runtime": 8.4442, "eval_samples_per_second": 492.29, "eval_steps_per_second": 61.581, "step": 30000 }, { "epoch": 3.4738041002277904, "grad_norm": 148.2794189453125, "learning_rate": 4.168277560886878e-06, "loss": 0.1106, "step": 30500 }, { "epoch": 3.4738041002277904, "eval_accuracy": 0.9201347125330768, "eval_f1_macro": 0.891345440347701, "eval_f1_micro": 0.9201347125330768, "eval_loss": 0.5669803023338318, "eval_precision_macro": 0.8817511779041877, "eval_precision_micro": 0.9201347125330768, "eval_recall_macro": 0.9129521474780394, "eval_recall_micro": 0.9201347125330768, "eval_runtime": 8.4316, "eval_samples_per_second": 493.024, "eval_steps_per_second": 61.672, "step": 30500 }, { "epoch": 3.5307517084282463, "grad_norm": 4.748849868774414, "learning_rate": 3.3293381943799983e-06, "loss": 0.0953, "step": 31000 }, { "epoch": 3.5307517084282463, "eval_accuracy": 0.9189319220591773, "eval_f1_macro": 0.8934567988898604, "eval_f1_micro": 0.9189319220591773, "eval_loss": 0.569622278213501, "eval_precision_macro": 0.8825638399763271, "eval_precision_micro": 0.9189319220591773, "eval_recall_macro": 0.9187150647269768, "eval_recall_micro": 0.9189319220591773, "eval_runtime": 8.4421, "eval_samples_per_second": 492.411, "eval_steps_per_second": 61.596, "step": 31000 }, { "epoch": 3.5876993166287017, "grad_norm": 1.284857153892517, "learning_rate": 2.580946040356764e-06, "loss": 0.0989, "step": 31500 }, { "epoch": 3.5876993166287017, "eval_accuracy": 0.9194130382487371, "eval_f1_macro": 0.8981523298720768, "eval_f1_micro": 0.9194130382487371, "eval_loss": 0.5652771592140198, "eval_precision_macro": 0.8854580096444836, "eval_precision_micro": 0.9194130382487371, "eval_recall_macro": 0.9219999897271275, "eval_recall_micro": 0.9194130382487371, "eval_runtime": 8.43, "eval_samples_per_second": 493.122, "eval_steps_per_second": 61.685, "step": 31500 }, { "epoch": 3.644646924829157, "grad_norm": 1.4339042901992798, "learning_rate": 1.9249490290167914e-06, "loss": 0.0989, "step": 32000 }, { "epoch": 3.644646924829157, "eval_accuracy": 0.9138802020687996, "eval_f1_macro": 0.89412218234686, "eval_f1_micro": 0.9138802020687996, "eval_loss": 0.6019502282142639, "eval_precision_macro": 0.882568682117879, "eval_precision_micro": 0.9138802020687996, "eval_recall_macro": 0.9175339639156622, "eval_recall_micro": 0.9138802020687996, "eval_runtime": 8.4297, "eval_samples_per_second": 493.138, "eval_steps_per_second": 61.687, "step": 32000 }, { "epoch": 3.7015945330296125, "grad_norm": 0.7662363648414612, "learning_rate": 1.3629669484372722e-06, "loss": 0.0876, "step": 32500 }, { "epoch": 3.7015945330296125, "eval_accuracy": 0.9167668992061583, "eval_f1_macro": 0.8960433945398601, "eval_f1_micro": 0.9167668992061583, "eval_loss": 0.578229546546936, "eval_precision_macro": 0.8823539415499122, "eval_precision_micro": 0.9167668992061583, "eval_recall_macro": 0.9211442367709737, "eval_recall_micro": 0.9167668992061583, "eval_runtime": 8.4419, "eval_samples_per_second": 492.426, "eval_steps_per_second": 61.598, "step": 32500 }, { "epoch": 3.7585421412300684, "grad_norm": 0.8838233947753906, "learning_rate": 8.963874449915156e-07, "loss": 0.1004, "step": 33000 }, { "epoch": 3.7585421412300684, "eval_accuracy": 0.9186913639643974, "eval_f1_macro": 0.8974164477796313, "eval_f1_micro": 0.9186913639643974, "eval_loss": 0.5638399720191956, "eval_precision_macro": 0.8850042534249818, "eval_precision_micro": 0.9186913639643974, "eval_recall_macro": 0.9204573653096998, "eval_recall_micro": 0.9186913639643974, "eval_runtime": 8.4287, "eval_samples_per_second": 493.198, "eval_steps_per_second": 61.694, "step": 33000 }, { "epoch": 3.8154897494305238, "grad_norm": 0.8862270712852478, "learning_rate": 5.263625969720654e-07, "loss": 0.1163, "step": 33500 }, { "epoch": 3.8154897494305238, "eval_accuracy": 0.9189319220591773, "eval_f1_macro": 0.8978554181419179, "eval_f1_micro": 0.9189319220591773, "eval_loss": 0.5575982332229614, "eval_precision_macro": 0.8842809438001717, "eval_precision_micro": 0.9189319220591773, "eval_recall_macro": 0.9207880969765441, "eval_recall_micro": 0.9189319220591773, "eval_runtime": 8.4411, "eval_samples_per_second": 492.47, "eval_steps_per_second": 61.603, "step": 33500 }, { "epoch": 3.8724373576309796, "grad_norm": 0.0018380646361038089, "learning_rate": 2.5380606987847725e-07, "loss": 0.1014, "step": 34000 }, { "epoch": 3.8724373576309796, "eval_accuracy": 0.9184508058696175, "eval_f1_macro": 0.8970972466165426, "eval_f1_micro": 0.9184508058696175, "eval_loss": 0.5561444759368896, "eval_precision_macro": 0.8836350789167062, "eval_precision_micro": 0.9184508058696175, "eval_recall_macro": 0.9206973516595941, "eval_recall_micro": 0.9184508058696175, "eval_runtime": 8.432, "eval_samples_per_second": 493.002, "eval_steps_per_second": 61.67, "step": 34000 }, { "epoch": 3.929384965831435, "grad_norm": 1.3534623384475708, "learning_rate": 7.939086039413291e-08, "loss": 0.0948, "step": 34500 }, { "epoch": 3.929384965831435, "eval_accuracy": 0.9191724801539571, "eval_f1_macro": 0.8982632597992836, "eval_f1_micro": 0.9191724801539571, "eval_loss": 0.5567488074302673, "eval_precision_macro": 0.8854065999210586, "eval_precision_micro": 0.9191724801539571, "eval_recall_macro": 0.9212323971384597, "eval_recall_micro": 0.9191724801539571, "eval_runtime": 8.4196, "eval_samples_per_second": 493.73, "eval_steps_per_second": 61.761, "step": 34500 }, { "epoch": 3.9863325740318905, "grad_norm": 0.00499533349648118, "learning_rate": 3.6007811694149795e-09, "loss": 0.0915, "step": 35000 }, { "epoch": 3.9863325740318905, "eval_accuracy": 0.9189319220591773, "eval_f1_macro": 0.8979067522497397, "eval_f1_micro": 0.9189319220591773, "eval_loss": 0.5568965673446655, "eval_precision_macro": 0.8849842519873105, "eval_precision_micro": 0.9189319220591773, "eval_recall_macro": 0.9210492469553095, "eval_recall_micro": 0.9189319220591773, "eval_runtime": 8.4437, "eval_samples_per_second": 492.321, "eval_steps_per_second": 61.585, "step": 35000 } ], "logging_steps": 500, "max_steps": 35120, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.368366329856e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }