{ "best_global_step": 90, "best_metric": 46.236559139784944, "best_model_checkpoint": "./tiny-naija-checkpoints/checkpoint-90", "epoch": 20.0, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7017543859649122, "grad_norm": 79.26960754394531, "learning_rate": 1.8000000000000001e-06, "loss": 4.3206, "step": 10 }, { "epoch": 1.0, "eval_loss": 3.6388587951660156, "eval_runtime": 14.5539, "eval_samples_per_second": 3.436, "eval_steps_per_second": 0.481, "eval_wer": 81.87403993855607, "step": 15 }, { "epoch": 1.3508771929824561, "grad_norm": 53.15379333496094, "learning_rate": 3.8000000000000005e-06, "loss": 3.4837, "step": 20 }, { "epoch": 2.0, "grad_norm": 58.95879364013672, "learning_rate": 5.8e-06, "loss": 2.5758, "step": 30 }, { "epoch": 2.0, "eval_loss": 2.2313578128814697, "eval_runtime": 13.3093, "eval_samples_per_second": 3.757, "eval_steps_per_second": 0.526, "eval_wer": 74.03993855606758, "step": 30 }, { "epoch": 2.7017543859649122, "grad_norm": 16.26763916015625, "learning_rate": 7.800000000000002e-06, "loss": 1.629, "step": 40 }, { "epoch": 3.0, "eval_loss": 1.3885557651519775, "eval_runtime": 14.6694, "eval_samples_per_second": 3.408, "eval_steps_per_second": 0.477, "eval_wer": 61.136712749615974, "step": 45 }, { "epoch": 3.3508771929824563, "grad_norm": 12.690998077392578, "learning_rate": 9.800000000000001e-06, "loss": 1.0623, "step": 50 }, { "epoch": 4.0, "grad_norm": 34.526302337646484, "learning_rate": 9.640000000000001e-06, "loss": 0.7805, "step": 60 }, { "epoch": 4.0, "eval_loss": 1.251920461654663, "eval_runtime": 12.2314, "eval_samples_per_second": 4.088, "eval_steps_per_second": 0.572, "eval_wer": 52.22734254992319, "step": 60 }, { "epoch": 4.701754385964913, "grad_norm": 8.408075332641602, "learning_rate": 9.240000000000001e-06, "loss": 0.6595, "step": 70 }, { "epoch": 5.0, "eval_loss": 1.205701231956482, "eval_runtime": 11.0287, "eval_samples_per_second": 4.534, "eval_steps_per_second": 0.635, "eval_wer": 47.15821812596006, "step": 75 }, { "epoch": 5.350877192982456, "grad_norm": 7.180197238922119, "learning_rate": 8.84e-06, "loss": 0.5661, "step": 80 }, { "epoch": 6.0, "grad_norm": 24.096446990966797, "learning_rate": 8.44e-06, "loss": 0.4548, "step": 90 }, { "epoch": 6.0, "eval_loss": 1.2086139917373657, "eval_runtime": 11.1174, "eval_samples_per_second": 4.497, "eval_steps_per_second": 0.63, "eval_wer": 46.236559139784944, "step": 90 }, { "epoch": 6.701754385964913, "grad_norm": 6.298120498657227, "learning_rate": 8.040000000000001e-06, "loss": 0.3583, "step": 100 }, { "epoch": 7.0, "eval_loss": 1.2257176637649536, "eval_runtime": 10.9626, "eval_samples_per_second": 4.561, "eval_steps_per_second": 0.639, "eval_wer": 49.76958525345622, "step": 105 }, { "epoch": 7.350877192982456, "grad_norm": 5.97155237197876, "learning_rate": 7.640000000000001e-06, "loss": 0.3552, "step": 110 }, { "epoch": 8.0, "grad_norm": 35.86328125, "learning_rate": 7.24e-06, "loss": 0.2761, "step": 120 }, { "epoch": 8.0, "eval_loss": 1.240835428237915, "eval_runtime": 11.012, "eval_samples_per_second": 4.54, "eval_steps_per_second": 0.636, "eval_wer": 49.46236559139785, "step": 120 }, { "epoch": 8.701754385964913, "grad_norm": 4.273914337158203, "learning_rate": 6.8400000000000014e-06, "loss": 0.2189, "step": 130 }, { "epoch": 9.0, "eval_loss": 1.2799654006958008, "eval_runtime": 10.9803, "eval_samples_per_second": 4.554, "eval_steps_per_second": 0.638, "eval_wer": 49.76958525345622, "step": 135 }, { "epoch": 9.350877192982455, "grad_norm": 4.224545001983643, "learning_rate": 6.440000000000001e-06, "loss": 0.2044, "step": 140 }, { "epoch": 10.0, "grad_norm": 16.503740310668945, "learning_rate": 6.040000000000001e-06, "loss": 0.1708, "step": 150 }, { "epoch": 10.0, "eval_loss": 1.3069055080413818, "eval_runtime": 10.9717, "eval_samples_per_second": 4.557, "eval_steps_per_second": 0.638, "eval_wer": 48.23348694316436, "step": 150 }, { "epoch": 10.701754385964913, "grad_norm": 2.8711295127868652, "learning_rate": 5.64e-06, "loss": 0.1448, "step": 160 }, { "epoch": 11.0, "eval_loss": 1.326417326927185, "eval_runtime": 13.3027, "eval_samples_per_second": 3.759, "eval_steps_per_second": 0.526, "eval_wer": 63.594470046082954, "step": 165 }, { "epoch": 11.350877192982455, "grad_norm": 2.82039737701416, "learning_rate": 5.240000000000001e-06, "loss": 0.125, "step": 170 }, { "epoch": 12.0, "grad_norm": 19.702402114868164, "learning_rate": 4.84e-06, "loss": 0.1169, "step": 180 }, { "epoch": 12.0, "eval_loss": 1.3684873580932617, "eval_runtime": 13.3579, "eval_samples_per_second": 3.743, "eval_steps_per_second": 0.524, "eval_wer": 63.90168970814132, "step": 180 }, { "epoch": 12.701754385964913, "grad_norm": 2.337428092956543, "learning_rate": 4.440000000000001e-06, "loss": 0.0839, "step": 190 }, { "epoch": 13.0, "eval_loss": 1.3854682445526123, "eval_runtime": 12.277, "eval_samples_per_second": 4.073, "eval_steps_per_second": 0.57, "eval_wer": 63.594470046082954, "step": 195 }, { "epoch": 13.350877192982455, "grad_norm": 3.5158214569091797, "learning_rate": 4.04e-06, "loss": 0.0844, "step": 200 }, { "epoch": 14.0, "grad_norm": 2.8117311000823975, "learning_rate": 3.6400000000000003e-06, "loss": 0.0701, "step": 210 }, { "epoch": 14.0, "eval_loss": 1.4106031656265259, "eval_runtime": 11.1252, "eval_samples_per_second": 4.494, "eval_steps_per_second": 0.629, "eval_wer": 50.84485407066052, "step": 210 }, { "epoch": 14.701754385964913, "grad_norm": 1.6476364135742188, "learning_rate": 3.2400000000000003e-06, "loss": 0.06, "step": 220 }, { "epoch": 15.0, "eval_loss": 1.4286068677902222, "eval_runtime": 10.9778, "eval_samples_per_second": 4.555, "eval_steps_per_second": 0.638, "eval_wer": 48.694316436251924, "step": 225 }, { "epoch": 15.350877192982455, "grad_norm": 1.7225351333618164, "learning_rate": 2.84e-06, "loss": 0.0583, "step": 230 }, { "epoch": 16.0, "grad_norm": 24.52082633972168, "learning_rate": 2.4400000000000004e-06, "loss": 0.0796, "step": 240 }, { "epoch": 16.0, "eval_loss": 1.4395389556884766, "eval_runtime": 10.9741, "eval_samples_per_second": 4.556, "eval_steps_per_second": 0.638, "eval_wer": 48.23348694316436, "step": 240 }, { "epoch": 16.70175438596491, "grad_norm": 1.8047239780426025, "learning_rate": 2.04e-06, "loss": 0.0522, "step": 250 }, { "epoch": 17.0, "eval_loss": 1.45657479763031, "eval_runtime": 10.9906, "eval_samples_per_second": 4.549, "eval_steps_per_second": 0.637, "eval_wer": 49.0015360983103, "step": 255 }, { "epoch": 17.350877192982455, "grad_norm": 1.3042881488800049, "learning_rate": 1.6400000000000002e-06, "loss": 0.0381, "step": 260 }, { "epoch": 18.0, "grad_norm": 2.017195701599121, "learning_rate": 1.2400000000000002e-06, "loss": 0.0373, "step": 270 }, { "epoch": 18.0, "eval_loss": 1.4669005870819092, "eval_runtime": 10.9521, "eval_samples_per_second": 4.565, "eval_steps_per_second": 0.639, "eval_wer": 48.54070660522273, "step": 270 }, { "epoch": 18.70175438596491, "grad_norm": 0.8439742922782898, "learning_rate": 8.400000000000001e-07, "loss": 0.0385, "step": 280 }, { "epoch": 19.0, "eval_loss": 1.4718369245529175, "eval_runtime": 10.9473, "eval_samples_per_second": 4.567, "eval_steps_per_second": 0.639, "eval_wer": 48.8479262672811, "step": 285 }, { "epoch": 19.350877192982455, "grad_norm": 1.529391884803772, "learning_rate": 4.4e-07, "loss": 0.0306, "step": 290 }, { "epoch": 20.0, "grad_norm": 2.573256254196167, "learning_rate": 4e-08, "loss": 0.035, "step": 300 }, { "epoch": 20.0, "eval_loss": 1.4741815328598022, "eval_runtime": 10.9806, "eval_samples_per_second": 4.553, "eval_steps_per_second": 0.637, "eval_wer": 49.30875576036866, "step": 300 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.2156996608e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }