{ "best_metric": 0.4363306793570824, "best_model_checkpoint": "./outputs_slid/ajesujoba/AfriHuBERT/checkpoint-1830", "epoch": 29.99591836734694, "eval_steps": 500, "global_step": 5490, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9959183673469387, "grad_norm": 0.7721740007400513, "learning_rate": 1.6666666666666667e-05, "loss": 3.7874, "step": 183 }, { "epoch": 0.9959183673469387, "eval_accuracy": 0.04349865165904561, "eval_f1": 0.004599781789327515, "eval_loss": 3.9047515392303467, "eval_runtime": 28.6696, "eval_samples_per_second": 297.493, "eval_steps_per_second": 0.593, "step": 183 }, { "epoch": 1.9959183673469387, "grad_norm": 0.8705180883407593, "learning_rate": 3.3333333333333335e-05, "loss": 3.0785, "step": 366 }, { "epoch": 1.9959183673469387, "eval_accuracy": 0.16649079610739828, "eval_f1": 0.07176290896776823, "eval_loss": 3.3783769607543945, "eval_runtime": 17.2721, "eval_samples_per_second": 493.804, "eval_steps_per_second": 0.984, "step": 366 }, { "epoch": 2.9959183673469387, "grad_norm": 1.0882235765457153, "learning_rate": 5e-05, "loss": 1.9687, "step": 549 }, { "epoch": 2.9959183673469387, "eval_accuracy": 0.41739946066361827, "eval_f1": 0.23146127598121502, "eval_loss": 2.4746670722961426, "eval_runtime": 18.0262, "eval_samples_per_second": 473.145, "eval_steps_per_second": 0.943, "step": 549 }, { "epoch": 3.9959183673469387, "grad_norm": 0.830756425857544, "learning_rate": 4.983095894354858e-05, "loss": 1.0019, "step": 732 }, { "epoch": 3.9959183673469387, "eval_accuracy": 0.5312463360300153, "eval_f1": 0.33343763170872565, "eval_loss": 2.056602954864502, "eval_runtime": 17.7176, "eval_samples_per_second": 481.386, "eval_steps_per_second": 0.959, "step": 732 }, { "epoch": 4.995918367346938, "grad_norm": 1.341150164604187, "learning_rate": 4.9326121764495596e-05, "loss": 0.4955, "step": 915 }, { "epoch": 4.995918367346938, "eval_accuracy": 0.5872904209168719, "eval_f1": 0.3966908687854425, "eval_loss": 2.070507526397705, "eval_runtime": 17.7204, "eval_samples_per_second": 481.309, "eval_steps_per_second": 0.959, "step": 915 }, { "epoch": 5.995918367346938, "grad_norm": 1.4914641380310059, "learning_rate": 4.849231551964771e-05, "loss": 0.3149, "step": 1098 }, { "epoch": 5.995918367346938, "eval_accuracy": 0.608277640989565, "eval_f1": 0.41221796485256534, "eval_loss": 2.174699544906616, "eval_runtime": 18.7633, "eval_samples_per_second": 454.558, "eval_steps_per_second": 0.906, "step": 1098 }, { "epoch": 6.995918367346938, "grad_norm": 1.016514539718628, "learning_rate": 4.734081600808531e-05, "loss": 0.2324, "step": 1281 }, { "epoch": 6.995918367346938, "eval_accuracy": 0.6051119709227342, "eval_f1": 0.42029644401424293, "eval_loss": 2.536925792694092, "eval_runtime": 19.0396, "eval_samples_per_second": 447.961, "eval_steps_per_second": 0.893, "step": 1281 }, { "epoch": 7.995918367346938, "grad_norm": 0.6603855490684509, "learning_rate": 4.588719528532342e-05, "loss": 0.1825, "step": 1464 }, { "epoch": 7.995918367346938, "eval_accuracy": 0.5930355258529723, "eval_f1": 0.37922494807809526, "eval_loss": 2.6477608680725098, "eval_runtime": 18.8796, "eval_samples_per_second": 451.757, "eval_steps_per_second": 0.9, "step": 1464 }, { "epoch": 8.995918367346938, "grad_norm": 0.9515678286552429, "learning_rate": 4.415111107797445e-05, "loss": 0.1581, "step": 1647 }, { "epoch": 8.995918367346938, "eval_accuracy": 0.5848282330871145, "eval_f1": 0.3902253760074279, "eval_loss": 2.7652101516723633, "eval_runtime": 28.9433, "eval_samples_per_second": 294.68, "eval_steps_per_second": 0.587, "step": 1647 }, { "epoch": 9.995918367346938, "grad_norm": 0.5628945827484131, "learning_rate": 4.215604094671835e-05, "loss": 0.1386, "step": 1830 }, { "epoch": 9.995918367346938, "eval_accuracy": 0.6253957087583538, "eval_f1": 0.4363306793570824, "eval_loss": 2.5493264198303223, "eval_runtime": 17.9843, "eval_samples_per_second": 474.247, "eval_steps_per_second": 0.945, "step": 1830 }, { "epoch": 10.995918367346938, "grad_norm": 0.5759875178337097, "learning_rate": 3.9928964792569655e-05, "loss": 0.13, "step": 2013 }, { "epoch": 10.995918367346938, "eval_accuracy": 0.6325477781686012, "eval_f1": 0.42658322719917263, "eval_loss": 2.668961763381958, "eval_runtime": 17.9422, "eval_samples_per_second": 475.359, "eval_steps_per_second": 0.947, "step": 2013 }, { "epoch": 11.995918367346938, "grad_norm": 0.7909059524536133, "learning_rate": 3.7500000000000003e-05, "loss": 0.1134, "step": 2196 }, { "epoch": 11.995918367346938, "eval_accuracy": 0.5902215969046781, "eval_f1": 0.40717895597633597, "eval_loss": 2.847268581390381, "eval_runtime": 18.1922, "eval_samples_per_second": 468.828, "eval_steps_per_second": 0.934, "step": 2196 }, { "epoch": 12.995918367346938, "grad_norm": 0.6743366718292236, "learning_rate": 3.490199415097892e-05, "loss": 0.1078, "step": 2379 }, { "epoch": 12.995918367346938, "eval_accuracy": 0.6048774768437097, "eval_f1": 0.40486374255791757, "eval_loss": 2.909079074859619, "eval_runtime": 17.3197, "eval_samples_per_second": 492.446, "eval_steps_per_second": 0.982, "step": 2379 }, { "epoch": 13.995918367346938, "grad_norm": 0.6435021758079529, "learning_rate": 3.217008081777726e-05, "loss": 0.0929, "step": 2562 }, { "epoch": 13.995918367346938, "eval_accuracy": 0.6124985344120061, "eval_f1": 0.402051577315403, "eval_loss": 2.901214599609375, "eval_runtime": 18.278, "eval_samples_per_second": 466.625, "eval_steps_per_second": 0.93, "step": 2562 }, { "epoch": 14.995918367346938, "grad_norm": 0.7225833535194397, "learning_rate": 2.9341204441673266e-05, "loss": 0.0879, "step": 2745 }, { "epoch": 14.995918367346938, "eval_accuracy": 0.5815453159807715, "eval_f1": 0.3787146481538575, "eval_loss": 2.927959442138672, "eval_runtime": 19.3124, "eval_samples_per_second": 441.634, "eval_steps_per_second": 0.88, "step": 2745 }, { "epoch": 15.995918367346938, "grad_norm": 0.519130527973175, "learning_rate": 2.6453620722761896e-05, "loss": 0.0875, "step": 2928 }, { "epoch": 15.995918367346938, "eval_accuracy": 0.6116778051354204, "eval_f1": 0.42421911178450894, "eval_loss": 2.8714120388031006, "eval_runtime": 18.6944, "eval_samples_per_second": 456.233, "eval_steps_per_second": 0.909, "step": 2928 }, { "epoch": 16.99591836734694, "grad_norm": 0.5847667455673218, "learning_rate": 2.3546379277238107e-05, "loss": 0.083, "step": 3111 }, { "epoch": 16.99591836734694, "eval_accuracy": 0.604994723883222, "eval_f1": 0.40283444897722465, "eval_loss": 2.9251325130462646, "eval_runtime": 19.0241, "eval_samples_per_second": 448.325, "eval_steps_per_second": 0.894, "step": 3111 }, { "epoch": 17.99591836734694, "grad_norm": 0.5335302948951721, "learning_rate": 2.0658795558326743e-05, "loss": 0.0743, "step": 3294 }, { "epoch": 17.99591836734694, "eval_accuracy": 0.6085121350685895, "eval_f1": 0.3982368535619314, "eval_loss": 2.907853364944458, "eval_runtime": 18.6799, "eval_samples_per_second": 456.587, "eval_steps_per_second": 0.91, "step": 3294 }, { "epoch": 18.99591836734694, "grad_norm": 0.6082349419593811, "learning_rate": 1.7829919182222752e-05, "loss": 0.0743, "step": 3477 }, { "epoch": 18.99591836734694, "eval_accuracy": 0.6140227459256654, "eval_f1": 0.40722488778058297, "eval_loss": 2.9568777084350586, "eval_runtime": 18.2131, "eval_samples_per_second": 468.288, "eval_steps_per_second": 0.933, "step": 3477 }, { "epoch": 19.99591836734694, "grad_norm": 0.5372836589813232, "learning_rate": 1.5112603381728762e-05, "loss": 0.0745, "step": 3660 }, { "epoch": 19.99591836734694, "eval_accuracy": 0.6022980419744401, "eval_f1": 0.3888247133789473, "eval_loss": 3.133009910583496, "eval_runtime": 19.5015, "eval_samples_per_second": 437.351, "eval_steps_per_second": 0.872, "step": 3660 }, { "epoch": 20.99591836734694, "grad_norm": 0.4080846905708313, "learning_rate": 1.2513768458995337e-05, "loss": 0.0641, "step": 3843 }, { "epoch": 20.99591836734694, "eval_accuracy": 0.6041739946066362, "eval_f1": 0.4024604989707059, "eval_loss": 3.086355447769165, "eval_runtime": 18.9488, "eval_samples_per_second": 450.109, "eval_steps_per_second": 0.897, "step": 3843 }, { "epoch": 21.99591836734694, "grad_norm": 0.6301392316818237, "learning_rate": 1.0083788397924998e-05, "loss": 0.0611, "step": 4026 }, { "epoch": 21.99591836734694, "eval_accuracy": 0.611560558095908, "eval_f1": 0.4250797125355288, "eval_loss": 3.1089813709259033, "eval_runtime": 19.3666, "eval_samples_per_second": 440.398, "eval_steps_per_second": 0.878, "step": 4026 }, { "epoch": 22.99591836734694, "grad_norm": 0.7403397560119629, "learning_rate": 7.855524510252082e-06, "loss": 0.0618, "step": 4209 }, { "epoch": 22.99591836734694, "eval_accuracy": 0.6095673584241997, "eval_f1": 0.38478101379896623, "eval_loss": 3.165566921234131, "eval_runtime": 18.268, "eval_samples_per_second": 466.882, "eval_steps_per_second": 0.931, "step": 4209 }, { "epoch": 23.99591836734694, "grad_norm": 0.6018996238708496, "learning_rate": 5.8591102425065766e-06, "loss": 0.0595, "step": 4392 }, { "epoch": 23.99591836734694, "eval_accuracy": 0.6026497830929769, "eval_f1": 0.4033953887201948, "eval_loss": 3.182464122772217, "eval_runtime": 18.8509, "eval_samples_per_second": 452.446, "eval_steps_per_second": 0.902, "step": 4392 }, { "epoch": 24.99591836734694, "grad_norm": 0.7152003049850464, "learning_rate": 4.1215436728432114e-06, "loss": 0.0549, "step": 4575 }, { "epoch": 24.99591836734694, "eval_accuracy": 0.6062844413178567, "eval_f1": 0.3998774411315016, "eval_loss": 3.2211174964904785, "eval_runtime": 18.4161, "eval_samples_per_second": 463.128, "eval_steps_per_second": 0.923, "step": 4575 }, { "epoch": 25.99591836734694, "grad_norm": 0.655457615852356, "learning_rate": 2.6663224083492645e-06, "loss": 0.0578, "step": 4758 }, { "epoch": 25.99591836734694, "eval_accuracy": 0.6093328643451753, "eval_f1": 0.40241682477511076, "eval_loss": 3.154259204864502, "eval_runtime": 19.0328, "eval_samples_per_second": 448.122, "eval_steps_per_second": 0.893, "step": 4758 }, { "epoch": 26.99591836734694, "grad_norm": 0.8799217939376831, "learning_rate": 1.5131258202183586e-06, "loss": 0.0531, "step": 4941 }, { "epoch": 26.99591836734694, "eval_accuracy": 0.611560558095908, "eval_f1": 0.4136571965633068, "eval_loss": 3.1584064960479736, "eval_runtime": 19.4229, "eval_samples_per_second": 439.121, "eval_steps_per_second": 0.875, "step": 4941 }, { "epoch": 27.99591836734694, "grad_norm": 0.5971439480781555, "learning_rate": 6.775489140148194e-07, "loss": 0.0556, "step": 5124 }, { "epoch": 27.99591836734694, "eval_accuracy": 0.6054637120412709, "eval_f1": 0.4107652565512037, "eval_loss": 3.177584171295166, "eval_runtime": 18.7393, "eval_samples_per_second": 455.14, "eval_steps_per_second": 0.907, "step": 5124 }, { "epoch": 28.99591836734694, "grad_norm": 0.5378488898277283, "learning_rate": 1.7089143397631958e-07, "loss": 0.0592, "step": 5307 }, { "epoch": 28.99591836734694, "eval_accuracy": 0.604994723883222, "eval_f1": 0.41074234435939105, "eval_loss": 3.1705150604248047, "eval_runtime": 19.1621, "eval_samples_per_second": 445.096, "eval_steps_per_second": 0.887, "step": 5307 }, { "epoch": 29.99591836734694, "grad_norm": 0.7799643278121948, "learning_rate": 5.053357646223056e-12, "loss": 0.0511, "step": 5490 }, { "epoch": 29.99591836734694, "eval_accuracy": 0.6051119709227342, "eval_f1": 0.41072097568738997, "eval_loss": 3.1688835620880127, "eval_runtime": 18.8369, "eval_samples_per_second": 452.78, "eval_steps_per_second": 0.902, "step": 5490 }, { "epoch": 29.99591836734694, "step": 5490, "total_flos": 5.117922821239409e+20, "train_loss": 0.060013725892225034, "train_runtime": 5236.9953, "train_samples_per_second": 2153.277, "train_steps_per_second": 1.048 } ], "logging_steps": 500, "max_steps": 5490, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.117922821239409e+20, "train_batch_size": 128, "trial_name": null, "trial_params": null }