| { | |
| "best_global_step": 90, | |
| "best_metric": 46.236559139784944, | |
| "best_model_checkpoint": "./tiny-naija-checkpoints/checkpoint-90", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.7017543859649122, | |
| "grad_norm": 79.26960754394531, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 4.3206, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 3.6388587951660156, | |
| "eval_runtime": 14.5539, | |
| "eval_samples_per_second": 3.436, | |
| "eval_steps_per_second": 0.481, | |
| "eval_wer": 81.87403993855607, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.3508771929824561, | |
| "grad_norm": 53.15379333496094, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 3.4837, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 58.95879364013672, | |
| "learning_rate": 5.8e-06, | |
| "loss": 2.5758, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.2313578128814697, | |
| "eval_runtime": 13.3093, | |
| "eval_samples_per_second": 3.757, | |
| "eval_steps_per_second": 0.526, | |
| "eval_wer": 74.03993855606758, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.7017543859649122, | |
| "grad_norm": 16.26763916015625, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 1.629, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.3885557651519775, | |
| "eval_runtime": 14.6694, | |
| "eval_samples_per_second": 3.408, | |
| "eval_steps_per_second": 0.477, | |
| "eval_wer": 61.136712749615974, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 3.3508771929824563, | |
| "grad_norm": 12.690998077392578, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 1.0623, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 34.526302337646484, | |
| "learning_rate": 9.640000000000001e-06, | |
| "loss": 0.7805, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.251920461654663, | |
| "eval_runtime": 12.2314, | |
| "eval_samples_per_second": 4.088, | |
| "eval_steps_per_second": 0.572, | |
| "eval_wer": 52.22734254992319, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.701754385964913, | |
| "grad_norm": 8.408075332641602, | |
| "learning_rate": 9.240000000000001e-06, | |
| "loss": 0.6595, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.205701231956482, | |
| "eval_runtime": 11.0287, | |
| "eval_samples_per_second": 4.534, | |
| "eval_steps_per_second": 0.635, | |
| "eval_wer": 47.15821812596006, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 5.350877192982456, | |
| "grad_norm": 7.180197238922119, | |
| "learning_rate": 8.84e-06, | |
| "loss": 0.5661, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 24.096446990966797, | |
| "learning_rate": 8.44e-06, | |
| "loss": 0.4548, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.2086139917373657, | |
| "eval_runtime": 11.1174, | |
| "eval_samples_per_second": 4.497, | |
| "eval_steps_per_second": 0.63, | |
| "eval_wer": 46.236559139784944, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.701754385964913, | |
| "grad_norm": 6.298120498657227, | |
| "learning_rate": 8.040000000000001e-06, | |
| "loss": 0.3583, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.2257176637649536, | |
| "eval_runtime": 10.9626, | |
| "eval_samples_per_second": 4.561, | |
| "eval_steps_per_second": 0.639, | |
| "eval_wer": 49.76958525345622, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 7.350877192982456, | |
| "grad_norm": 5.97155237197876, | |
| "learning_rate": 7.640000000000001e-06, | |
| "loss": 0.3552, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 35.86328125, | |
| "learning_rate": 7.24e-06, | |
| "loss": 0.2761, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.240835428237915, | |
| "eval_runtime": 11.012, | |
| "eval_samples_per_second": 4.54, | |
| "eval_steps_per_second": 0.636, | |
| "eval_wer": 49.46236559139785, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.701754385964913, | |
| "grad_norm": 4.273914337158203, | |
| "learning_rate": 6.8400000000000014e-06, | |
| "loss": 0.2189, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.2799654006958008, | |
| "eval_runtime": 10.9803, | |
| "eval_samples_per_second": 4.554, | |
| "eval_steps_per_second": 0.638, | |
| "eval_wer": 49.76958525345622, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 9.350877192982455, | |
| "grad_norm": 4.224545001983643, | |
| "learning_rate": 6.440000000000001e-06, | |
| "loss": 0.2044, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 16.503740310668945, | |
| "learning_rate": 6.040000000000001e-06, | |
| "loss": 0.1708, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 1.3069055080413818, | |
| "eval_runtime": 10.9717, | |
| "eval_samples_per_second": 4.557, | |
| "eval_steps_per_second": 0.638, | |
| "eval_wer": 48.23348694316436, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.701754385964913, | |
| "grad_norm": 2.8711295127868652, | |
| "learning_rate": 5.64e-06, | |
| "loss": 0.1448, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 1.326417326927185, | |
| "eval_runtime": 13.3027, | |
| "eval_samples_per_second": 3.759, | |
| "eval_steps_per_second": 0.526, | |
| "eval_wer": 63.594470046082954, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 11.350877192982455, | |
| "grad_norm": 2.82039737701416, | |
| "learning_rate": 5.240000000000001e-06, | |
| "loss": 0.125, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 19.702402114868164, | |
| "learning_rate": 4.84e-06, | |
| "loss": 0.1169, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 1.3684873580932617, | |
| "eval_runtime": 13.3579, | |
| "eval_samples_per_second": 3.743, | |
| "eval_steps_per_second": 0.524, | |
| "eval_wer": 63.90168970814132, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.701754385964913, | |
| "grad_norm": 2.337428092956543, | |
| "learning_rate": 4.440000000000001e-06, | |
| "loss": 0.0839, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 1.3854682445526123, | |
| "eval_runtime": 12.277, | |
| "eval_samples_per_second": 4.073, | |
| "eval_steps_per_second": 0.57, | |
| "eval_wer": 63.594470046082954, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 13.350877192982455, | |
| "grad_norm": 3.5158214569091797, | |
| "learning_rate": 4.04e-06, | |
| "loss": 0.0844, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 2.8117311000823975, | |
| "learning_rate": 3.6400000000000003e-06, | |
| "loss": 0.0701, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 1.4106031656265259, | |
| "eval_runtime": 11.1252, | |
| "eval_samples_per_second": 4.494, | |
| "eval_steps_per_second": 0.629, | |
| "eval_wer": 50.84485407066052, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 14.701754385964913, | |
| "grad_norm": 1.6476364135742188, | |
| "learning_rate": 3.2400000000000003e-06, | |
| "loss": 0.06, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 1.4286068677902222, | |
| "eval_runtime": 10.9778, | |
| "eval_samples_per_second": 4.555, | |
| "eval_steps_per_second": 0.638, | |
| "eval_wer": 48.694316436251924, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 15.350877192982455, | |
| "grad_norm": 1.7225351333618164, | |
| "learning_rate": 2.84e-06, | |
| "loss": 0.0583, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 24.52082633972168, | |
| "learning_rate": 2.4400000000000004e-06, | |
| "loss": 0.0796, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 1.4395389556884766, | |
| "eval_runtime": 10.9741, | |
| "eval_samples_per_second": 4.556, | |
| "eval_steps_per_second": 0.638, | |
| "eval_wer": 48.23348694316436, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.70175438596491, | |
| "grad_norm": 1.8047239780426025, | |
| "learning_rate": 2.04e-06, | |
| "loss": 0.0522, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 1.45657479763031, | |
| "eval_runtime": 10.9906, | |
| "eval_samples_per_second": 4.549, | |
| "eval_steps_per_second": 0.637, | |
| "eval_wer": 49.0015360983103, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 17.350877192982455, | |
| "grad_norm": 1.3042881488800049, | |
| "learning_rate": 1.6400000000000002e-06, | |
| "loss": 0.0381, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 2.017195701599121, | |
| "learning_rate": 1.2400000000000002e-06, | |
| "loss": 0.0373, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 1.4669005870819092, | |
| "eval_runtime": 10.9521, | |
| "eval_samples_per_second": 4.565, | |
| "eval_steps_per_second": 0.639, | |
| "eval_wer": 48.54070660522273, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 18.70175438596491, | |
| "grad_norm": 0.8439742922782898, | |
| "learning_rate": 8.400000000000001e-07, | |
| "loss": 0.0385, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 1.4718369245529175, | |
| "eval_runtime": 10.9473, | |
| "eval_samples_per_second": 4.567, | |
| "eval_steps_per_second": 0.639, | |
| "eval_wer": 48.8479262672811, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 19.350877192982455, | |
| "grad_norm": 1.529391884803772, | |
| "learning_rate": 4.4e-07, | |
| "loss": 0.0306, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 2.573256254196167, | |
| "learning_rate": 4e-08, | |
| "loss": 0.035, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 1.4741815328598022, | |
| "eval_runtime": 10.9806, | |
| "eval_samples_per_second": 4.553, | |
| "eval_steps_per_second": 0.637, | |
| "eval_wer": 49.30875576036866, | |
| "step": 300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.2156996608e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |