| { | |
| "best_metric": 0.4363306793570824, | |
| "best_model_checkpoint": "./outputs_slid/ajesujoba/AfriHuBERT/checkpoint-1830", | |
| "epoch": 29.99591836734694, | |
| "eval_steps": 500, | |
| "global_step": 5490, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.9959183673469387, | |
| "grad_norm": 0.7721740007400513, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 3.7874, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9959183673469387, | |
| "eval_accuracy": 0.04349865165904561, | |
| "eval_f1": 0.004599781789327515, | |
| "eval_loss": 3.9047515392303467, | |
| "eval_runtime": 28.6696, | |
| "eval_samples_per_second": 297.493, | |
| "eval_steps_per_second": 0.593, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 1.9959183673469387, | |
| "grad_norm": 0.8705180883407593, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 3.0785, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.9959183673469387, | |
| "eval_accuracy": 0.16649079610739828, | |
| "eval_f1": 0.07176290896776823, | |
| "eval_loss": 3.3783769607543945, | |
| "eval_runtime": 17.2721, | |
| "eval_samples_per_second": 493.804, | |
| "eval_steps_per_second": 0.984, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 2.9959183673469387, | |
| "grad_norm": 1.0882235765457153, | |
| "learning_rate": 5e-05, | |
| "loss": 1.9687, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.9959183673469387, | |
| "eval_accuracy": 0.41739946066361827, | |
| "eval_f1": 0.23146127598121502, | |
| "eval_loss": 2.4746670722961426, | |
| "eval_runtime": 18.0262, | |
| "eval_samples_per_second": 473.145, | |
| "eval_steps_per_second": 0.943, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 3.9959183673469387, | |
| "grad_norm": 0.830756425857544, | |
| "learning_rate": 4.983095894354858e-05, | |
| "loss": 1.0019, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.9959183673469387, | |
| "eval_accuracy": 0.5312463360300153, | |
| "eval_f1": 0.33343763170872565, | |
| "eval_loss": 2.056602954864502, | |
| "eval_runtime": 17.7176, | |
| "eval_samples_per_second": 481.386, | |
| "eval_steps_per_second": 0.959, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 4.995918367346938, | |
| "grad_norm": 1.341150164604187, | |
| "learning_rate": 4.9326121764495596e-05, | |
| "loss": 0.4955, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.995918367346938, | |
| "eval_accuracy": 0.5872904209168719, | |
| "eval_f1": 0.3966908687854425, | |
| "eval_loss": 2.070507526397705, | |
| "eval_runtime": 17.7204, | |
| "eval_samples_per_second": 481.309, | |
| "eval_steps_per_second": 0.959, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.995918367346938, | |
| "grad_norm": 1.4914641380310059, | |
| "learning_rate": 4.849231551964771e-05, | |
| "loss": 0.3149, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 5.995918367346938, | |
| "eval_accuracy": 0.608277640989565, | |
| "eval_f1": 0.41221796485256534, | |
| "eval_loss": 2.174699544906616, | |
| "eval_runtime": 18.7633, | |
| "eval_samples_per_second": 454.558, | |
| "eval_steps_per_second": 0.906, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 6.995918367346938, | |
| "grad_norm": 1.016514539718628, | |
| "learning_rate": 4.734081600808531e-05, | |
| "loss": 0.2324, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 6.995918367346938, | |
| "eval_accuracy": 0.6051119709227342, | |
| "eval_f1": 0.42029644401424293, | |
| "eval_loss": 2.536925792694092, | |
| "eval_runtime": 19.0396, | |
| "eval_samples_per_second": 447.961, | |
| "eval_steps_per_second": 0.893, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 7.995918367346938, | |
| "grad_norm": 0.6603855490684509, | |
| "learning_rate": 4.588719528532342e-05, | |
| "loss": 0.1825, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 7.995918367346938, | |
| "eval_accuracy": 0.5930355258529723, | |
| "eval_f1": 0.37922494807809526, | |
| "eval_loss": 2.6477608680725098, | |
| "eval_runtime": 18.8796, | |
| "eval_samples_per_second": 451.757, | |
| "eval_steps_per_second": 0.9, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 8.995918367346938, | |
| "grad_norm": 0.9515678286552429, | |
| "learning_rate": 4.415111107797445e-05, | |
| "loss": 0.1581, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 8.995918367346938, | |
| "eval_accuracy": 0.5848282330871145, | |
| "eval_f1": 0.3902253760074279, | |
| "eval_loss": 2.7652101516723633, | |
| "eval_runtime": 28.9433, | |
| "eval_samples_per_second": 294.68, | |
| "eval_steps_per_second": 0.587, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 9.995918367346938, | |
| "grad_norm": 0.5628945827484131, | |
| "learning_rate": 4.215604094671835e-05, | |
| "loss": 0.1386, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 9.995918367346938, | |
| "eval_accuracy": 0.6253957087583538, | |
| "eval_f1": 0.4363306793570824, | |
| "eval_loss": 2.5493264198303223, | |
| "eval_runtime": 17.9843, | |
| "eval_samples_per_second": 474.247, | |
| "eval_steps_per_second": 0.945, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 10.995918367346938, | |
| "grad_norm": 0.5759875178337097, | |
| "learning_rate": 3.9928964792569655e-05, | |
| "loss": 0.13, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 10.995918367346938, | |
| "eval_accuracy": 0.6325477781686012, | |
| "eval_f1": 0.42658322719917263, | |
| "eval_loss": 2.668961763381958, | |
| "eval_runtime": 17.9422, | |
| "eval_samples_per_second": 475.359, | |
| "eval_steps_per_second": 0.947, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 11.995918367346938, | |
| "grad_norm": 0.7909059524536133, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.1134, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 11.995918367346938, | |
| "eval_accuracy": 0.5902215969046781, | |
| "eval_f1": 0.40717895597633597, | |
| "eval_loss": 2.847268581390381, | |
| "eval_runtime": 18.1922, | |
| "eval_samples_per_second": 468.828, | |
| "eval_steps_per_second": 0.934, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 12.995918367346938, | |
| "grad_norm": 0.6743366718292236, | |
| "learning_rate": 3.490199415097892e-05, | |
| "loss": 0.1078, | |
| "step": 2379 | |
| }, | |
| { | |
| "epoch": 12.995918367346938, | |
| "eval_accuracy": 0.6048774768437097, | |
| "eval_f1": 0.40486374255791757, | |
| "eval_loss": 2.909079074859619, | |
| "eval_runtime": 17.3197, | |
| "eval_samples_per_second": 492.446, | |
| "eval_steps_per_second": 0.982, | |
| "step": 2379 | |
| }, | |
| { | |
| "epoch": 13.995918367346938, | |
| "grad_norm": 0.6435021758079529, | |
| "learning_rate": 3.217008081777726e-05, | |
| "loss": 0.0929, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 13.995918367346938, | |
| "eval_accuracy": 0.6124985344120061, | |
| "eval_f1": 0.402051577315403, | |
| "eval_loss": 2.901214599609375, | |
| "eval_runtime": 18.278, | |
| "eval_samples_per_second": 466.625, | |
| "eval_steps_per_second": 0.93, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 14.995918367346938, | |
| "grad_norm": 0.7225833535194397, | |
| "learning_rate": 2.9341204441673266e-05, | |
| "loss": 0.0879, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 14.995918367346938, | |
| "eval_accuracy": 0.5815453159807715, | |
| "eval_f1": 0.3787146481538575, | |
| "eval_loss": 2.927959442138672, | |
| "eval_runtime": 19.3124, | |
| "eval_samples_per_second": 441.634, | |
| "eval_steps_per_second": 0.88, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 15.995918367346938, | |
| "grad_norm": 0.519130527973175, | |
| "learning_rate": 2.6453620722761896e-05, | |
| "loss": 0.0875, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 15.995918367346938, | |
| "eval_accuracy": 0.6116778051354204, | |
| "eval_f1": 0.42421911178450894, | |
| "eval_loss": 2.8714120388031006, | |
| "eval_runtime": 18.6944, | |
| "eval_samples_per_second": 456.233, | |
| "eval_steps_per_second": 0.909, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 16.99591836734694, | |
| "grad_norm": 0.5847667455673218, | |
| "learning_rate": 2.3546379277238107e-05, | |
| "loss": 0.083, | |
| "step": 3111 | |
| }, | |
| { | |
| "epoch": 16.99591836734694, | |
| "eval_accuracy": 0.604994723883222, | |
| "eval_f1": 0.40283444897722465, | |
| "eval_loss": 2.9251325130462646, | |
| "eval_runtime": 19.0241, | |
| "eval_samples_per_second": 448.325, | |
| "eval_steps_per_second": 0.894, | |
| "step": 3111 | |
| }, | |
| { | |
| "epoch": 17.99591836734694, | |
| "grad_norm": 0.5335302948951721, | |
| "learning_rate": 2.0658795558326743e-05, | |
| "loss": 0.0743, | |
| "step": 3294 | |
| }, | |
| { | |
| "epoch": 17.99591836734694, | |
| "eval_accuracy": 0.6085121350685895, | |
| "eval_f1": 0.3982368535619314, | |
| "eval_loss": 2.907853364944458, | |
| "eval_runtime": 18.6799, | |
| "eval_samples_per_second": 456.587, | |
| "eval_steps_per_second": 0.91, | |
| "step": 3294 | |
| }, | |
| { | |
| "epoch": 18.99591836734694, | |
| "grad_norm": 0.6082349419593811, | |
| "learning_rate": 1.7829919182222752e-05, | |
| "loss": 0.0743, | |
| "step": 3477 | |
| }, | |
| { | |
| "epoch": 18.99591836734694, | |
| "eval_accuracy": 0.6140227459256654, | |
| "eval_f1": 0.40722488778058297, | |
| "eval_loss": 2.9568777084350586, | |
| "eval_runtime": 18.2131, | |
| "eval_samples_per_second": 468.288, | |
| "eval_steps_per_second": 0.933, | |
| "step": 3477 | |
| }, | |
| { | |
| "epoch": 19.99591836734694, | |
| "grad_norm": 0.5372836589813232, | |
| "learning_rate": 1.5112603381728762e-05, | |
| "loss": 0.0745, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 19.99591836734694, | |
| "eval_accuracy": 0.6022980419744401, | |
| "eval_f1": 0.3888247133789473, | |
| "eval_loss": 3.133009910583496, | |
| "eval_runtime": 19.5015, | |
| "eval_samples_per_second": 437.351, | |
| "eval_steps_per_second": 0.872, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 20.99591836734694, | |
| "grad_norm": 0.4080846905708313, | |
| "learning_rate": 1.2513768458995337e-05, | |
| "loss": 0.0641, | |
| "step": 3843 | |
| }, | |
| { | |
| "epoch": 20.99591836734694, | |
| "eval_accuracy": 0.6041739946066362, | |
| "eval_f1": 0.4024604989707059, | |
| "eval_loss": 3.086355447769165, | |
| "eval_runtime": 18.9488, | |
| "eval_samples_per_second": 450.109, | |
| "eval_steps_per_second": 0.897, | |
| "step": 3843 | |
| }, | |
| { | |
| "epoch": 21.99591836734694, | |
| "grad_norm": 0.6301392316818237, | |
| "learning_rate": 1.0083788397924998e-05, | |
| "loss": 0.0611, | |
| "step": 4026 | |
| }, | |
| { | |
| "epoch": 21.99591836734694, | |
| "eval_accuracy": 0.611560558095908, | |
| "eval_f1": 0.4250797125355288, | |
| "eval_loss": 3.1089813709259033, | |
| "eval_runtime": 19.3666, | |
| "eval_samples_per_second": 440.398, | |
| "eval_steps_per_second": 0.878, | |
| "step": 4026 | |
| }, | |
| { | |
| "epoch": 22.99591836734694, | |
| "grad_norm": 0.7403397560119629, | |
| "learning_rate": 7.855524510252082e-06, | |
| "loss": 0.0618, | |
| "step": 4209 | |
| }, | |
| { | |
| "epoch": 22.99591836734694, | |
| "eval_accuracy": 0.6095673584241997, | |
| "eval_f1": 0.38478101379896623, | |
| "eval_loss": 3.165566921234131, | |
| "eval_runtime": 18.268, | |
| "eval_samples_per_second": 466.882, | |
| "eval_steps_per_second": 0.931, | |
| "step": 4209 | |
| }, | |
| { | |
| "epoch": 23.99591836734694, | |
| "grad_norm": 0.6018996238708496, | |
| "learning_rate": 5.8591102425065766e-06, | |
| "loss": 0.0595, | |
| "step": 4392 | |
| }, | |
| { | |
| "epoch": 23.99591836734694, | |
| "eval_accuracy": 0.6026497830929769, | |
| "eval_f1": 0.4033953887201948, | |
| "eval_loss": 3.182464122772217, | |
| "eval_runtime": 18.8509, | |
| "eval_samples_per_second": 452.446, | |
| "eval_steps_per_second": 0.902, | |
| "step": 4392 | |
| }, | |
| { | |
| "epoch": 24.99591836734694, | |
| "grad_norm": 0.7152003049850464, | |
| "learning_rate": 4.1215436728432114e-06, | |
| "loss": 0.0549, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 24.99591836734694, | |
| "eval_accuracy": 0.6062844413178567, | |
| "eval_f1": 0.3998774411315016, | |
| "eval_loss": 3.2211174964904785, | |
| "eval_runtime": 18.4161, | |
| "eval_samples_per_second": 463.128, | |
| "eval_steps_per_second": 0.923, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 25.99591836734694, | |
| "grad_norm": 0.655457615852356, | |
| "learning_rate": 2.6663224083492645e-06, | |
| "loss": 0.0578, | |
| "step": 4758 | |
| }, | |
| { | |
| "epoch": 25.99591836734694, | |
| "eval_accuracy": 0.6093328643451753, | |
| "eval_f1": 0.40241682477511076, | |
| "eval_loss": 3.154259204864502, | |
| "eval_runtime": 19.0328, | |
| "eval_samples_per_second": 448.122, | |
| "eval_steps_per_second": 0.893, | |
| "step": 4758 | |
| }, | |
| { | |
| "epoch": 26.99591836734694, | |
| "grad_norm": 0.8799217939376831, | |
| "learning_rate": 1.5131258202183586e-06, | |
| "loss": 0.0531, | |
| "step": 4941 | |
| }, | |
| { | |
| "epoch": 26.99591836734694, | |
| "eval_accuracy": 0.611560558095908, | |
| "eval_f1": 0.4136571965633068, | |
| "eval_loss": 3.1584064960479736, | |
| "eval_runtime": 19.4229, | |
| "eval_samples_per_second": 439.121, | |
| "eval_steps_per_second": 0.875, | |
| "step": 4941 | |
| }, | |
| { | |
| "epoch": 27.99591836734694, | |
| "grad_norm": 0.5971439480781555, | |
| "learning_rate": 6.775489140148194e-07, | |
| "loss": 0.0556, | |
| "step": 5124 | |
| }, | |
| { | |
| "epoch": 27.99591836734694, | |
| "eval_accuracy": 0.6054637120412709, | |
| "eval_f1": 0.4107652565512037, | |
| "eval_loss": 3.177584171295166, | |
| "eval_runtime": 18.7393, | |
| "eval_samples_per_second": 455.14, | |
| "eval_steps_per_second": 0.907, | |
| "step": 5124 | |
| }, | |
| { | |
| "epoch": 28.99591836734694, | |
| "grad_norm": 0.5378488898277283, | |
| "learning_rate": 1.7089143397631958e-07, | |
| "loss": 0.0592, | |
| "step": 5307 | |
| }, | |
| { | |
| "epoch": 28.99591836734694, | |
| "eval_accuracy": 0.604994723883222, | |
| "eval_f1": 0.41074234435939105, | |
| "eval_loss": 3.1705150604248047, | |
| "eval_runtime": 19.1621, | |
| "eval_samples_per_second": 445.096, | |
| "eval_steps_per_second": 0.887, | |
| "step": 5307 | |
| }, | |
| { | |
| "epoch": 29.99591836734694, | |
| "grad_norm": 0.7799643278121948, | |
| "learning_rate": 5.053357646223056e-12, | |
| "loss": 0.0511, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 29.99591836734694, | |
| "eval_accuracy": 0.6051119709227342, | |
| "eval_f1": 0.41072097568738997, | |
| "eval_loss": 3.1688835620880127, | |
| "eval_runtime": 18.8369, | |
| "eval_samples_per_second": 452.78, | |
| "eval_steps_per_second": 0.902, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 29.99591836734694, | |
| "step": 5490, | |
| "total_flos": 5.117922821239409e+20, | |
| "train_loss": 0.060013725892225034, | |
| "train_runtime": 5236.9953, | |
| "train_samples_per_second": 2153.277, | |
| "train_steps_per_second": 1.048 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 5490, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.117922821239409e+20, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |